diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index a9131c886..2d5562bdd 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -946,6 +946,11 @@ cpp::result ModelService::StartModel( json_helper::MergeJson(json_data, params_override); + // Set default cpu_threads if it is not configured + if (!json_data.isMember("cpu_threads")) { + json_data["cpu_threads"] = GetCpuThreads(); + } + // Set the latest ctx_len if (ctx_len) { json_data["ctx_len"] = @@ -1330,6 +1335,10 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl, return warning; } +int ModelService::GetCpuThreads() const { + return std::max(std::thread::hardware_concurrency() / 2, 1u); +} + cpp::result, std::string> ModelService::GetModelMetadata(const std::string& model_id) const { if (model_id.empty()) { diff --git a/engine/services/model_service.h b/engine/services/model_service.h index 17f2c0ddb..dcf99430f 100644 --- a/engine/services/model_service.h +++ b/engine/services/model_service.h @@ -112,6 +112,8 @@ class ModelService { const std::string& model_path, int ngl, int ctx_len, int n_batch = 2048, int n_ubatch = 2048, const std::string& kv_cache_type = "f16"); + int GetCpuThreads() const; + std::shared_ptr db_service_; std::shared_ptr hw_service_; std::shared_ptr download_service_; diff --git a/engine/utils/hardware/gguf/gguf_file.h b/engine/utils/hardware/gguf/gguf_file.h index 361668242..640c1b49f 100644 --- a/engine/utils/hardware/gguf/gguf_file.h +++ b/engine/utils/hardware/gguf/gguf_file.h @@ -7,11 +7,11 @@ #include #include #include +#include #include #include #include #include -#include #ifdef _WIN32 #include @@ -23,8 +23,8 @@ #endif #include "ggml.h" -#include "utils/string_utils.h" #include "utils/logging_utils.h" +#include "utils/string_utils.h" // #define GGUF_LOG(msg) \ // do { \ @@ -246,11 +246,15 @@ struct GGUFHelper { file_size = std::filesystem::file_size(file_path); int fd = open(file_path.c_str(), O_RDONLY); + if (fd == -1) { + CTL_INF("Failed to open file: " << file_path << ", error: " << errno); + return false; + } // Memory-map the file data = static_cast( mmap(nullptr, file_size, PROT_READ, MAP_PRIVATE, fd, 0)); if (data == MAP_FAILED) { - perror("Error mapping file"); + CTL_INF("Error mapping file"); close(fd); return false; } @@ -482,7 +486,7 @@ struct GGUFFile { inline std::optional ParseGgufFile(const std::string& path) { GGUFFile gf; GGUFHelper h; - if(!h.OpenAndMMap(path)) { + if (!h.OpenAndMMap(path)) { return std::nullopt; } diff --git a/engine/utils/hardware/gguf/gguf_file_estimate.h b/engine/utils/hardware/gguf/gguf_file_estimate.h index 12a7e72e1..402a70958 100644 --- a/engine/utils/hardware/gguf/gguf_file_estimate.h +++ b/engine/utils/hardware/gguf/gguf_file_estimate.h @@ -64,7 +64,6 @@ inline float GetQuantBit(const std::string& kv_cache_t) { inline std::optional EstimateLLaMACppRun( const std::string& file_path, const RunConfig& rc) { - Estimation res; // token_embeddings_size = n_vocab * embedding_length * 2 * quant_bit/16 bytes //RAM = token_embeddings_size + ((total_ngl-ngl) >=1 ? Output_layer_size + (total_ngl - ngl - 1 ) / (total_ngl-1) * (total_file_size - token_embeddings_size - Output_layer_size) : 0 ) (bytes) @@ -72,6 +71,7 @@ inline std::optional EstimateLLaMACppRun( auto gf = ParseGgufFile(file_path); if (!gf) return std::nullopt; + Estimation res; int32_t embedding_length = 0; int64_t n_vocab = 0; int32_t num_block = 0;