Skip to content

Commit 14f651d

Browse files
committed
updated notebooks
Signed-off-by: Sharvari Medhe <[email protected]>
1 parent d020b88 commit 14f651d

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

notebooks/QEfficientGPT2.ipynb

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
"outputs": [],
3434
"source": [
3535
"# Initiate the Original Transformer model\n",
36+
"# Initiate the tokenizer for transformers library\n",
37+
"from transformers import AutoTokenizer\n",
38+
"\n",
3639
"from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM\n",
3740
"\n",
3841
"# Please uncomment and use appropriate Cache Directory for transformers, in case you don't want to use default ~/.cache dir.\n",
@@ -92,11 +95,7 @@
9295
"# Compile the model for provided compilation arguments\n",
9396
"# Please use platform SDK to Check num_cores for your card.\n",
9497
"\n",
95-
"qeff_model.compile(\n",
96-
" num_cores=14,\n",
97-
" mxfp6=True,\n",
98-
" device_group=[0],\n",
99-
")"
98+
"qeff_model.compile(num_cores=14, mxfp6_matmul=True)"
10099
]
101100
},
102101
{
@@ -116,8 +115,8 @@
116115
"source": [
117116
"# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
118117
"# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
119-
"\n",
120-
"qeff_model.generate(prompts=[\"My name is\"])"
118+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
119+
"qeff_model.generate(prompts=[\"My name is\"], tokenizer=tokenizer)"
121120
]
122121
}
123122
],

notebooks/QEfficientMPT.ipynb

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
"outputs": [],
3333
"source": [
3434
"# Initiate the Original Transformer model\n",
35+
"# Initiate the tokenizer for transformers library\n",
36+
"from transformers import AutoTokenizer\n",
3537
"\n",
3638
"from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM\n",
3739
"\n",
@@ -91,11 +93,7 @@
9193
"# Compile the model for provided compilation arguments\n",
9294
"# Please use platform SDK to Check num_cores for your card.\n",
9395
"\n",
94-
"qeff_model.compile(\n",
95-
" num_cores=14,\n",
96-
" mxfp6=True,\n",
97-
" device_group=[0],\n",
98-
")"
96+
"qeff_model.compile(num_cores=14, mxfp6_matmul=True)"
9997
]
10098
},
10199
{
@@ -116,7 +114,8 @@
116114
"# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
117115
"# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
118116
"\n",
119-
"qeff_model.generate(prompts=[\"My name is\"])"
117+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
118+
"qeff_model.generate(prompts=[\"My name is\"], tokenizer=tokenizer)"
120119
]
121120
}
122121
],

0 commit comments

Comments
 (0)