Skip to content

Commit bbb9de3

Browse files
committed
Update some comments.
Signed-off-by: xipingya <[email protected]>
1 parent 4a8901c commit bbb9de3

File tree

6 files changed

+12
-13
lines changed

6 files changed

+12
-13
lines changed

src/cpp/src/continuous_batching/pipeline.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
6868

6969
if (is_prompt_lookup_enabled) {
7070
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
71-
// OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
7271
m_impl = std::make_shared<PromptLookupImpl>(model, embedder, tokenizer, scheduler_config, device, properties_without_draft_model_without_gguf, generation_config);
7372
} else if (draft_model_desr.model != nullptr) {
7473
OPENVINO_ASSERT(embedder == nullptr, "Speculative decoding is not supported for models with embeddings");

src/cpp/src/continuous_batching/pipeline_base.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ ContinuousBatchingPipeline::IContinuousBatchingPipeline::generate(
154154
const StreamerVariant& streamer) {
155155
auto generate_start_time = std::chrono::steady_clock::now();
156156
OPENVINO_ASSERT(m_model_input_type == ModelInputType::EMBEDDINGS);
157+
157158
OPENVINO_ASSERT(prompts.size() == sampling_params.size(), "Number of prompts should be equal to the number of generation configs.");
158159
OPENVINO_ASSERT(prompts.size() == rgbs_vector.size(), "Number of prompts should be equal to the number of images vectors.");
159160

src/cpp/src/prompt_lookup/continuous_batching_for_prompt_lookup.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,15 @@ void ContinuousBatchingPipeline::ContinuousBatchingForPromptLookupImpl::generate
8282
}
8383
TokenIds candidates = generate_candidates(full_input_ids, min_num_assistant_tokens, sampling_params.max_ngram_size);
8484

85+
// Padding to candidate token,
86+
// Avoid shape checking and increasing the amount of computation when the shape changes.
8587
if (candidates.size() < sampling_params.num_assistant_tokens) {
86-
auto token_sz = candidates.size();
87-
for (int ci = 0; ci < sampling_params.num_assistant_tokens - token_sz; ci ++) {
88-
// last token?
89-
candidates.push_back(15000);
88+
if (full_input_ids.size() > 0) {
89+
auto token_sz = candidates.size();
90+
for (int ci = 0; ci < sampling_params.num_assistant_tokens - token_sz; ci++) {
91+
// Padding with last token.
92+
candidates.push_back(full_input_ids.back());
93+
}
9094
}
9195
}
9296

src/cpp/src/prompt_lookup/prompt_lookup_impl.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,6 @@ void ContinuousBatchingPipeline::PromptLookupImpl::step() {
3939
ManualTimer step_timer("prompt_lookup_decoding: step()");
4040
step_timer.start();
4141

42-
// ManualTimer candidates_timer("prompt_lookup_decoding: generate_candidates()");
43-
// candidates_timer.start();
44-
// m_pipeline->generate_candidates();
45-
// candidates_timer.end();
46-
// m_sd_metrics.draft_duration += candidates_timer.get_duration();
4742
auto generated_len_before = m_pipeline->get_generated_request_len();
4843

4944
ManualTimer main_timer("prompt_lookup_decoding: pipeline: step()");

src/cpp/src/visual_language/gemma3/classes.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ class InputsEmbedderGemma3 : public InputsEmbedder::IInputsEmbedder {
3737

3838
ov::Tensor get_inputs_embeds(const std::string& prompt, const std::vector<ov::genai::EncodedImage>& images, ov::genai::VLMPerfMetrics& metrics, bool recalculate_merged_embeddings = true, const std::vector<size_t>& image_sequence = {}) override;
3939

40+
std::pair<ov::Tensor, ov::Tensor> get_inputs_embeds_with_token_type_ids(const std::string& prompt, const std::vector<EncodedImage>& images, VLMPerfMetrics& metrics, bool recalculate_merged_embeddings = true, const std::vector<size_t>& image_sequence = {}) override;
41+
4042
bool has_token_type_ids() const override;
4143

4244
std::vector<ov::genai::EncodedImage> encode_images(const std::vector<ov::Tensor>& images) override;
4345

44-
std::pair<ov::Tensor, ov::Tensor> get_inputs_embeds_with_token_type_ids(const std::string& prompt, const std::vector<ov::genai::EncodedImage>& images, ov::genai::VLMPerfMetrics& metrics, bool recalculate_merged_embeddings = true, const std::vector<size_t>& image_sequence = {}) override;
45-
4646
std::pair<std::string, std::vector<size_t>> normalize_prompt(const std::string& prompt, size_t base_id, const std::vector<EncodedImage>& images) const override;
4747

4848
std::pair<ov::Tensor, std::optional<int64_t>> get_position_ids(const size_t inputs_embeds_size, const size_t history_size) override;

src/cpp/src/visual_language/inputs_embedder.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class InputsEmbedder {
105105
utils::KVCacheState m_kv_cache_state;
106106
// length of attention_mask/kv cache at the beginning of generation()
107107
size_t m_prev_hist_length = 0;
108-
// When enable prompt lookup, prompt token type is need.
108+
// When enable prompt lookup, prompt token ids are required to generate condidate.
109109
bool m_prompt_lookup = false;
110110
virtual ~IInputsEmbedder() = default;
111111

0 commit comments

Comments
 (0)