Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
421627a
Serialize generated OV model from GGUF model for faster pipe initiali…
May 15, 2025
7d1c9de
Add try-catch to handle expecption raise by serialize, continue with …
May 15, 2025
cb883ba
Minior refactor to handle different gguf model in same directory
May 16, 2025
67b2fd7
Explicit save model based on ov::cache_dir properties, add time measu…
May 16, 2025
20c24b4
use ov:save model to compress OV model
May 19, 2025
14bc5f5
Merge branch 'master' into gguf_model_cache
May 19, 2025
ff05f51
Merge branch 'master' into gguf_model_cache
May 20, 2025
63fd0ee
Implict cache generated ov model constructed from gguf
May 20, 2025
89671e8
apply review comments
May 20, 2025
f96a014
Merge branch 'gguf_model_cache' of https://github.com/sammysun0711/op…
May 20, 2025
83db989
Remove unused header file
May 20, 2025
e7d5552
Add special property ENALBE_SAVE_OV_MODEL to control whether save gen…
May 20, 2025
20effa2
Merge branch 'master' into gguf_model_cache
May 20, 2025
538cd86
Merge branch 'master' into gguf_model_cache
May 21, 2025
5f23a9c
Simplify logic based on #2129 and #2240
May 21, 2025
11eaf5d
Add documents and update error message
May 21, 2025
dc6dd2b
Add test case
May 21, 2025
e7b1b23
Add ov::genai::enable_save_ov_model property
May 22, 2025
e7cc549
Control GGUF reader related debug info with OPENVINO_LOG_LEVEL
May 22, 2025
fc2d433
update test case
May 22, 2025
495c1d4
Merge branch 'master' into gguf_model_cache
May 22, 2025
9c808ec
Merge branch 'master' into gguf_model_cache
May 22, 2025
6ac37fb
minior fix for test
May 22, 2025
80c26b8
Merge branch 'master' into gguf_model_cache
May 22, 2025
f2c5080
Update src/cpp/src/gguf_utils/gguf_modeling.cpp
May 22, 2025
c568ddd
Merge branch 'master' into gguf_model_cache
May 23, 2025
55e9468
Merge branch 'master' into gguf_model_cache
May 26, 2025
fa6df48
Merge branch 'master' into gguf_model_cache
May 27, 2025
32b7427
Fix merge conflict
May 28, 2025
5e73520
Merge branch 'master' into gguf_model_cache
May 28, 2025
8488486
Fix merge conflict
May 30, 2025
8fd892d
move save_openvino_model to utils for re-use
May 30, 2025
1fd26c2
Save generated ov_tokenizer & ov_detokenzier model for re-use
May 30, 2025
51f87fa
Update test
May 30, 2025
f0991a3
Fix review comments
May 30, 2025
2b0b29a
minnor test fix
May 30, 2025
807d0f9
Update test
May 30, 2025
5bf0e69
remove unused import
May 30, 2025
f8c84f5
Move extract_draft_model_from_config, extract_prompt_lookup_from_conf…
May 30, 2025
030dcde
Test only: pass no properties to tokenizer/detokenizer
May 30, 2025
617c1dc
Revert "Move extract_draft_model_from_config, extract_prompt_lookup_f…
May 30, 2025
01c9eb5
Simplify unused properties handling for tokenizer
May 30, 2025
76466d1
Set enable_save_ov_model as None by default
May 30, 2025
afa9bbb
Merge branch 'master' into gguf_model_cache
May 30, 2025
4788fd3
Merge branch 'master' into gguf_model_cache
Jun 3, 2025
13ab0df
test enable_save_ov_model=False only
Jun 4, 2025
fb4fb17
enable save_ov_model test
Jun 4, 2025
3ef08bf
[Debug only] try use macos-13-large to check if core dump cause by li…
Jun 4, 2025
a05b435
Merge branch 'master' into gguf_model_cache
Jun 6, 2025
cad5068
Revert "[Debug only] try use macos-13-large to check if core dump cau…
Jun 6, 2025
c4a8ce1
release unused pipeline with gc to save memory
Jun 6, 2025
62448b6
try to further reduce test memory usage
Jun 6, 2025
d3147b5
reduce memory usage for test_pipelines_with_gguf_generate
Jun 6, 2025
3c232ac
Split separate test for gguf enable_save_ov_model to save memory usage
Jun 6, 2025
1283de6
Fix merge conflict
Jun 10, 2025
b6a8384
Refactor test
Jun 10, 2025
e354e4e
Fix merge conflict
Jun 10, 2025
e618f5c
Merge branch 'master' into gguf_model_cache
Jun 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions src/cpp/src/gguf_utils/gguf_modeling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <openvino/openvino.hpp>
#include "openvino/runtime/core.hpp"
#include "openvino/opsets/opset13.hpp"
#include "openvino/pass/serialize.hpp"

#include "gguf_utils/building_blocks.hpp"
#include "gguf_utils/gguf_modeling.hpp"
Expand Down Expand Up @@ -152,24 +153,47 @@ std::shared_ptr<ov::Model> create_language_model(

} // namespace

std::shared_ptr<ov::Model> create_from_gguf(const std::string& model_path) {
void save_openvino_model(const std::shared_ptr<ov::Model>& model, const std::string& save_path, bool compress_to_fp16) {
try {
auto serialize_start_time = std::chrono::high_resolution_clock::now();
ov::save_model(model, save_path, compress_to_fp16);
auto serialize_finish_time = std::chrono::high_resolution_clock::now();
auto serialize_duration = std::chrono::duration_cast<std::chrono::milliseconds>(serialize_finish_time - serialize_start_time).count();
std::cout << "Save generated OpenVINO model to: " << save_path << " done. Time: " << serialize_duration << " ms\n";
}
catch (const ov::Exception& e) {
std::cerr << "[Warning] Exception during model serialization: " << e.what() << std::endl;
}
}

std::shared_ptr<ov::Model> create_from_gguf(const std::string& model_path, const ov::AnyMap& properties) {
auto start_time = std::chrono::high_resolution_clock::now();
std::cout << "Loading and unpacking model from: " << model_path << std::endl;
auto [config, consts, qtypes] = load_gguf(model_path);
auto load_finish_time = std::chrono::high_resolution_clock::now();
std::cout << "Loading and unpacking model done. Time: " << std::chrono::duration_cast<std::chrono::milliseconds>(load_finish_time - start_time).count() << "ms" << std::endl;
std::cout << "Start generating OV model..." << std::endl;
std::cout << "Start generating OpenVINO model..." << std::endl;

std::shared_ptr<ov::Model> model;

const std::string model_arch = std::get<std::string>(config.at("architecture"));
if (!model_arch.compare("llama") || !model_arch.compare("qwen2")) {
model = create_language_model(config, consts, qtypes);
if (properties.find(ov::cache_dir.name()) != properties.end()) {
std::string cache_dir = properties.at(ov::cache_dir.name()).as<std::string>();
if (!cache_dir.empty()) {
std::filesystem::path model_cache_dir(cache_dir);
std::filesystem::path gguf_model_path(model_path);
std::filesystem::path save_path = model_cache_dir / (gguf_model_path.stem().string() + "_openvino_model.xml");
save_openvino_model(model, save_path.string(), true);
}
}
} else {
OPENVINO_THROW("Unsupported model architecture '", model_arch, "'");
}
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - load_finish_time).count();
std::cout << "Model generation done. Time: " << duration << "ms" << std::endl;

return model;
}
2 changes: 1 addition & 1 deletion src/cpp/src/gguf_utils/gguf_modeling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@

#include "openvino/openvino.hpp"

std::shared_ptr<ov::Model> create_from_gguf(const std::string& model_path);
std::shared_ptr<ov::Model> create_from_gguf(const std::string& model_path, const ov::AnyMap& properties);
23 changes: 20 additions & 3 deletions src/cpp/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,10 +292,27 @@ bool is_gguf_model(const std::filesystem::path& file_path) {

} // namespace

std::shared_ptr<ov::Model> read_model(const std::filesystem::path& model_dir, const ov::AnyMap& config) {
std::shared_ptr<ov::Model> read_model(const std::filesystem::path& model_dir, const ov::AnyMap& properties) {
if (is_gguf_model(model_dir)) {
#ifdef ENABLE_GGUF
return create_from_gguf(model_dir.string());
if (properties.find(ov::cache_dir.name()) != properties.end()) {
std::string cache_dir = properties.at(ov::cache_dir.name()).as<std::string>();
if (!cache_dir.empty()) {
std::filesystem::path model_cache_dir(cache_dir);
std::filesystem::path model_path = model_cache_dir / (model_dir.stem().string() + "_openvino_model.xml");
if (std::filesystem::exists(model_path)) {
std::cout << "Found generated OpenVINO model: " << model_path.string() << ", skip creating from GGUF model.\n";
auto start_time = std::chrono::high_resolution_clock::now();
auto model = singleton_core().read_model(model_path, {}, properties);
auto load_finish_time = std::chrono::high_resolution_clock::now();
std::cout << "Loading OpenVINO model done. Time: " << std::chrono::duration_cast<std::chrono::milliseconds>(load_finish_time - start_time).count() << "ms" << std::endl;
return model;
}
}
return create_from_gguf(model_dir.string(), properties);
} else {
return create_from_gguf(model_dir.string(), properties);
}
#else
OPENVINO_ASSERT("GGUF support is switched off. Please, recompile with 'cmake -DENABLE_GGUF=ON'");
#endif
Expand All @@ -310,7 +327,7 @@ std::shared_ptr<ov::Model> read_model(const std::filesystem::path& model_dir, c
OPENVINO_THROW("Could not find a model in the directory '", model_dir, "'");
}

return singleton_core().read_model(model_path, {}, config);
return singleton_core().read_model(model_path, {}, properties);
}
}

Expand Down
Loading