updated notebooks

smedhe · smedhe · commit 14f651de1220 · 2025-08-20T09:31:59.000Z
Signed-off-by: Sharvari Medhe &lt;smedhe@qti.qualcomm.com&gt;
diff --git a/notebooks/QEfficientGPT2.ipynb b/notebooks/QEfficientGPT2.ipynb
@@ -33,6 +33,9 @@
    "outputs": [],
    "source": [
     "# Initiate the Original Transformer model\n",
+    "# Initiate the tokenizer for transformers library\n",
+    "from transformers import AutoTokenizer\n",
+    "\n",
     "from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM\n",
     "\n",
     "# Please uncomment and use appropriate Cache Directory for transformers, in case you don't want to use default ~/.cache dir.\n",
@@ -92,11 +95,7 @@
     "# Compile the model for provided compilation arguments\n",
     "# Please use platform SDK to Check num_cores for your card.\n",
     "\n",
-    "qeff_model.compile(\n",
-    "    num_cores=14,\n",
-    "    mxfp6=True,\n",
-    "    device_group=[0],\n",
-    ")"
+    "qeff_model.compile(num_cores=14, mxfp6_matmul=True)"
    ]
   },
   {
@@ -116,8 +115,8 @@
    "source": [
     "# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
     "# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
-    "\n",
-    "qeff_model.generate(prompts=[\"My name is\"])"
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "qeff_model.generate(prompts=[\"My name is\"], tokenizer=tokenizer)"
    ]
   }
  ],
diff --git a/notebooks/QEfficientMPT.ipynb b/notebooks/QEfficientMPT.ipynb
@@ -32,6 +32,8 @@
    "outputs": [],
    "source": [
     "# Initiate the Original Transformer model\n",
+    "# Initiate the tokenizer for transformers library\n",
+    "from transformers import AutoTokenizer\n",
     "\n",
     "from QEfficient import QEFFAutoModelForCausalLM as AutoModelForCausalLM\n",
     "\n",
@@ -91,11 +93,7 @@
     "# Compile the model for provided compilation arguments\n",
     "# Please use platform SDK to Check num_cores for your card.\n",
     "\n",
-    "qeff_model.compile(\n",
-    "    num_cores=14,\n",
-    "    mxfp6=True,\n",
-    "    device_group=[0],\n",
-    ")"
+    "qeff_model.compile(num_cores=14, mxfp6_matmul=True)"
    ]
   },
   {
@@ -116,7 +114,8 @@
     "# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
     "# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
     "\n",
-    "qeff_model.generate(prompts=[\"My name is\"])"
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "qeff_model.generate(prompts=[\"My name is\"], tokenizer=tokenizer)"
    ]
   }
  ],