|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | + |
| 3 | +from typing import List, Tuple |
| 4 | + |
| 5 | +import pytest |
| 6 | +import torch |
| 7 | +from transformers import AutoTokenizer |
| 8 | + |
| 9 | +from tests.v1.engine.utils import (NUM_PROMPT_LOGPROBS_UNDER_TEST, |
| 10 | + NUM_SAMPLE_LOGPROBS_UNDER_TEST, PROMPT_LEN, |
| 11 | + TOKENIZER_NAME, |
| 12 | + DummyOutputProcessorTestVectors, |
| 13 | + generate_dummy_prompt_logprobs_tensors, |
| 14 | + generate_dummy_sample_logprobs) |
| 15 | +from vllm.engine.arg_utils import EngineArgs |
| 16 | +from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs |
| 17 | + |
| 18 | +from tests.v1.engine.utils import FULL_STRINGS # isort: skip |
| 19 | + |
| 20 | +EngineCoreSampleLogprobsType = List[Tuple[torch.Tensor, torch.Tensor]] |
| 21 | +EngineCorePromptLogprobsType = Tuple[torch.Tensor, torch.Tensor] |
| 22 | + |
| 23 | + |
| 24 | +def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors: |
| 25 | + """Generate output processor dummy test vectors, without logprobs |
| 26 | + |
| 27 | + Returns: |
| 28 | + DummyOutputProcessorTestVectors instance with no logprobs |
| 29 | + """ |
| 30 | + |
| 31 | + tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME) |
| 32 | + vllm_config = EngineArgs(model=TOKENIZER_NAME).create_engine_config() |
| 33 | + # Tokenize prompts under test & create dummy generated tokens |
| 34 | + prompt_tokens = [ |
| 35 | + tokenizer(text).input_ids[:PROMPT_LEN] for text in FULL_STRINGS |
| 36 | + ] |
| 37 | + generation_tokens = [ |
| 38 | + tokenizer(text).input_ids[PROMPT_LEN:] for text in FULL_STRINGS |
| 39 | + ] |
| 40 | + # Generate prompt strings |
| 41 | + prompt_strings = [ |
| 42 | + tokenizer.decode(prompt_tokens, skip_special_tokens=True) |
| 43 | + for prompt_tokens in prompt_tokens |
| 44 | + ] |
| 45 | + prompt_strings_len = [ |
| 46 | + len(prompt_string) for prompt_string in prompt_strings |
| 47 | + ] |
| 48 | + return DummyOutputProcessorTestVectors( |
| 49 | + tokenizer=tokenizer, |
| 50 | + tokenizer_group=init_tokenizer_from_configs( |
| 51 | + vllm_config.model_config, vllm_config.scheduler_config, |
| 52 | + vllm_config.parallel_config, vllm_config.lora_config), |
| 53 | + vllm_config=vllm_config, |
| 54 | + full_tokens=[tokenizer(text).input_ids for text in FULL_STRINGS], |
| 55 | + prompt_tokens=prompt_tokens, |
| 56 | + generation_tokens=generation_tokens, |
| 57 | + prompt_strings=prompt_strings, |
| 58 | + prompt_strings_len=prompt_strings_len, |
| 59 | + generation_strings=[ |
| 60 | + text[prompt_len:] |
| 61 | + for text, prompt_len in zip(FULL_STRINGS, prompt_strings_len) |
| 62 | + ], |
| 63 | + prompt_logprobs=[], |
| 64 | + generation_logprobs=[]) |
| 65 | + |
| 66 | + |
| 67 | +@pytest.fixture |
| 68 | +def dummy_test_vectors() -> DummyOutputProcessorTestVectors: |
| 69 | + """Generate output processor dummy test vectors, with logprobs |
| 70 | + |
| 71 | + Returns: |
| 72 | + DummyOutputProcessorTestVectors instance with logprobs |
| 73 | + """ |
| 74 | + # Build dummy test vectors without logprobs |
| 75 | + dtv = _build_test_vectors_no_logprobs() |
| 76 | + # Inject logprobs into dummy test vectors |
| 77 | + # data structure |
| 78 | + dtv.generation_logprobs = [ |
| 79 | + generate_dummy_sample_logprobs( |
| 80 | + sampled_tokens_list=tokens_list, |
| 81 | + num_logprobs=NUM_SAMPLE_LOGPROBS_UNDER_TEST, |
| 82 | + tokenizer=dtv.tokenizer) for tokens_list in dtv.generation_tokens |
| 83 | + ] |
| 84 | + dtv.prompt_logprobs = [ |
| 85 | + generate_dummy_prompt_logprobs_tensors( |
| 86 | + prompt_tokens_list=tokens_list, |
| 87 | + num_logprobs=NUM_PROMPT_LOGPROBS_UNDER_TEST, |
| 88 | + tokenizer=dtv.tokenizer) for tokens_list in dtv.prompt_tokens |
| 89 | + ] |
| 90 | + return dtv |
0 commit comments