From 5a5a71d7bdf70ac0d72711bcb6e1ae30d3ac5c7e Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Wed, 4 Oct 2023 17:31:15 +0100
Subject: [PATCH 01/34] Starting gpt4all

---
 examples/gpt4all.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/gpt4all.sh b/examples/gpt4all.sh
index 5fd739e55c554..92f1e372b244a 100755
--- a/examples/gpt4all.sh
+++ b/examples/gpt4all.sh
@@ -7,7 +7,7 @@
 cd `dirname $0`
 cd ..
 
-./main --color --instruct --threads 4 \
+./build/bin/main --color --instruct --threads 4 \
        --model ./models/gpt4all-7B/gpt4all-lora-quantized.bin \
        --file ./prompts/alpaca.txt \
        --batch_size 8 --ctx_size 2048 -n -1 \

From 1bb192fc2747f8693a0301cca5c36a09165ac1f9 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 11:45:20 +0100
Subject: [PATCH 02/34] Add cmap_example.cpp

---
 scripts/cmap_example.cpp | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 scripts/cmap_example.cpp

diff --git a/scripts/cmap_example.cpp b/scripts/cmap_example.cpp
new file mode 100644
index 0000000000000..78b6330256f6f
--- /dev/null
+++ b/scripts/cmap_example.cpp
@@ -0,0 +1,23 @@
+// example of a C/C++ equivalent data structure to the python dict
+// there are two: std::map automatically sorts on key; std::unordered_map doesn't
+
+#include <iostream>
+#include <map>
+
+int main() {
+    std::map<std::string, int> dict;
+
+    dict["apple"] = 5;
+    dict["banana"] = 2;
+    dict["orange"] = 7;
+
+
+    // Accessing elements in the map
+    std::cout << "Value of apple: " << dict["apple"] << std::endl;
+
+    for (const auto& pair : dict) {
+        std::cout << "Key: " << pair.first << ", Value: " << pair.second << std::endl;
+    }
+
+    return 0;
+}
\ No newline at end of file

From aea055db42eadc44459188e8929adae43b38d9b6 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 11:52:44 +0100
Subject: [PATCH 03/34] Update cmap_example

---
 examples/cmap_example/CMakeLists.txt                | 5 +++++
 {scripts => examples/cmap_example}/cmap_example.cpp | 0
 2 files changed, 5 insertions(+)
 create mode 100644 examples/cmap_example/CMakeLists.txt
 rename {scripts => examples/cmap_example}/cmap_example.cpp (100%)

diff --git a/examples/cmap_example/CMakeLists.txt b/examples/cmap_example/CMakeLists.txt
new file mode 100644
index 0000000000000..48acc6f74034d
--- /dev/null
+++ b/examples/cmap_example/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(TARGET cmap_example)
+add_executable(${TARGET} cmap_example.cpp)
+install(TARGETS ${TARGET} RUNTIME)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
diff --git a/scripts/cmap_example.cpp b/examples/cmap_example/cmap_example.cpp
similarity index 100%
rename from scripts/cmap_example.cpp
rename to examples/cmap_example/cmap_example.cpp

From eb939e0638ed30cda3c93dd1dbd4937ca56c8b40 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 12:04:03 +0100
Subject: [PATCH 04/34] Update cmap-example

---
 examples/{cmap_example => cmap-example}/CMakeLists.txt        | 4 ++--
 .../cmap_example.cpp => cmap-example/cmap-example.cpp}        | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename examples/{cmap_example => cmap-example}/CMakeLists.txt (71%)
 rename examples/{cmap_example/cmap_example.cpp => cmap-example/cmap-example.cpp} (100%)

diff --git a/examples/cmap_example/CMakeLists.txt b/examples/cmap-example/CMakeLists.txt
similarity index 71%
rename from examples/cmap_example/CMakeLists.txt
rename to examples/cmap-example/CMakeLists.txt
index 48acc6f74034d..c5820f7b1bfcd 100644
--- a/examples/cmap_example/CMakeLists.txt
+++ b/examples/cmap-example/CMakeLists.txt
@@ -1,5 +1,5 @@
-set(TARGET cmap_example)
-add_executable(${TARGET} cmap_example.cpp)
+set(TARGET cmap-example)
+add_executable(${TARGET} cmap-example.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
diff --git a/examples/cmap_example/cmap_example.cpp b/examples/cmap-example/cmap-example.cpp
similarity index 100%
rename from examples/cmap_example/cmap_example.cpp
rename to examples/cmap-example/cmap-example.cpp

From e3c8f7bd167e51a346909d6324dd6b8a6796496f Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 12:13:30 +0100
Subject: [PATCH 05/34] Update cmap-example

---
 examples/CMakeLists.txt                | 1 +
 examples/cmap-example/cmap-example.cpp | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index de4cf7a691768..9b39b3fa9aa13 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -30,6 +30,7 @@ else()
     add_subdirectory(embd-input)
     add_subdirectory(llama-bench)
     add_subdirectory(beam-search)
+    add_subdirectory(cmap-examples)
     if (LLAMA_METAL)
         add_subdirectory(metal)
     endif()
diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index 78b6330256f6f..e3af5073fd754 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -1,6 +1,8 @@
 // example of a C/C++ equivalent data structure to the python dict
 // there are two: std::map automatically sorts on key; std::unordered_map doesn't
 
+#include "llama.h"
+
 #include <iostream>
 #include <map>
 

From 73fa2deee10e9c80b4598683fd5abe54cbc6d880 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 12:18:21 +0100
Subject: [PATCH 06/34] Update cmap-example

---
 examples/cmap-example/cmap-example.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index e3af5073fd754..c6db1129b86d9 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -1,9 +1,6 @@
 // example of a C/C++ equivalent data structure to the python dict
-// there are two: std::map automatically sorts on key; std::unordered_map doesn't
+// there are two: std::map automatically sorts on key; std::unordered_map does not
 
-#include "llama.h"
-
-#include <iostream>
 #include <map>
 
 int main() {
@@ -15,10 +12,10 @@ int main() {
 
 
     // Accessing elements in the map
-    std::cout << "Value of apple: " << dict["apple"] << std::endl;
+    printf("Value of apple: %d\n", dict["apple"]);
 
     for (const auto& pair : dict) {
-        std::cout << "Key: " << pair.first << ", Value: " << pair.second << std::endl;
+    printf("Key: %s, Value: $s\n", pair.first, pair.second);
     }
 
     return 0;

From 9dad8b81e1219da954a97e5ddfd62127ffbb2e66 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 12:23:49 +0100
Subject: [PATCH 07/34] Update cmap-example

---
 Makefile                | 3 +++
 examples/CMakeLists.txt | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 91198c555f5d0..8d37947a54f8a 100644
--- a/Makefile
+++ b/Makefile
@@ -564,6 +564,9 @@ perplexity: examples/perplexity/perplexity.cpp                build-info.h ggml.
 embedding: examples/embedding/embedding.cpp                   build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
+cmap-example: examples/cmap-example/cmap-example.cpp          build-info.h ggml.o llama.o common.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
 save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 9b39b3fa9aa13..b32706b0337e0 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -30,7 +30,7 @@ else()
     add_subdirectory(embd-input)
     add_subdirectory(llama-bench)
     add_subdirectory(beam-search)
-    add_subdirectory(cmap-examples)
+    add_subdirectory(cmap-example)
     if (LLAMA_METAL)
         add_subdirectory(metal)
     endif()

From 7804fe0d687c42d2505d741f70ceb350ecce2942 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 12:31:10 +0100
Subject: [PATCH 08/34] Update cmap-example

---
 examples/cmap-example/cmap-example.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index c6db1129b86d9..a86171b6c11f7 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -2,17 +2,18 @@
 // there are two: std::map automatically sorts on key; std::unordered_map does not
 
 #include <map>
+#include <string>
 
 int main() {
     std::map<std::string, int> dict;
 
-    dict["apple"] = 5;
-    dict["banana"] = 2;
-    dict["orange"] = 7;
+    dict[std::string("apple")] = 5;
+    dict[std::string("banana")] = 2;
+    dict[std::string("orange")] = 7;
 
 
     // Accessing elements in the map
-    printf("Value of apple: %d\n", dict["apple"]);
+    printf("Value of apple: %d\n", dict[std::string("apple")]);
 
     for (const auto& pair : dict) {
     printf("Key: %s, Value: $s\n", pair.first, pair.second);

From 317d195f68f79a8cadc8db38f63009c18d3f3c1b Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 12:33:20 +0100
Subject: [PATCH 09/34] Update cmap-examples

---
 examples/cmap-example/cmap-example.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index a86171b6c11f7..7b11dccda8051 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -16,7 +16,7 @@ int main() {
     printf("Value of apple: %d\n", dict[std::string("apple")]);
 
     for (const auto& pair : dict) {
-    printf("Key: %s, Value: $s\n", pair.first, pair.second);
+    printf("Key: %s, Value: $d\n", pair.first.c_str(), pair.second);
     }
 
     return 0;

From 380a10f8b5c3f5b50379f5ccb60a240217f7c9c9 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 12:35:19 +0100
Subject: [PATCH 10/34] Update cmap-example

---
 examples/cmap-example/cmap-example.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index 7b11dccda8051..daf827e51706e 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -16,7 +16,7 @@ int main() {
     printf("Value of apple: %d\n", dict[std::string("apple")]);
 
     for (const auto& pair : dict) {
-    printf("Key: %s, Value: $d\n", pair.first.c_str(), pair.second);
+    printf("Key: %s, Value: %d\n", pair.first.c_str(), pair.second);
     }
 
     return 0;

From 275d56e99eaf509702eddfbcdb4363c27545e4a3 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 15:38:21 +0100
Subject: [PATCH 11/34] Update cmap-example

---
 examples/cmap-example/cmap-example.cpp | 107 ++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index daf827e51706e..bc4e5ce7cdd94 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -2,10 +2,13 @@
 // there are two: std::map automatically sorts on key; std::unordered_map does not
 
 #include <map>
+#include <list>
 #include <string>
+#include <bitset>
 
 int main() {
     std::map<std::string, int> dict;
+    std::map<std::string, std::list<std::string>> helpdict;
 
     dict[std::string("apple")] = 5;
     dict[std::string("banana")] = 2;
@@ -13,10 +16,110 @@ int main() {
 
 
     // Accessing elements in the map
-    printf("Value of apple: %d\n", dict[std::string("apple")]);
+    printf("First kind of dictionary\n\nValue of apple: %d\n", dict[std::string("apple")]);
 
     for (const auto& pair : dict) {
-    printf("Key: %s, Value: %d\n", pair.first.c_str(), pair.second);
+    printf("Key: %10s, Value: %4d\n", pair.first.c_str(), pair.second);
+    }
+
+    // Now try the helpdict idea
+    
+    printf("Second kind of dictionary\n");
+
+    // Create a list of strings
+    std::list<std::string> stringList = {"apple", "banana", "orange"};
+
+    // Add key-value pair to map
+    helpdict["fruits"] = stringList;
+
+    // Access and modify the list of strings
+    std::list<std::string>& fruitsList = helpdict["fruits"];
+    fruitsList.push_back("grape");
+    fruitsList.push_back("pineapple");
+   
+    for (const auto& pair : helpdict) {
+        printf("helpdict contains a list of %s\n", pair.first.c_str());
+        for (const auto& element : pair.second) {
+            printf(" %s", element.c_str());
+        }
+        printf("\n");
+    }
+
+    // Create a binary key for each value consisting of a list of strings
+
+    std::map<std::string, std::list<std::string>> bitdict;
+
+    // Example binary key
+    int binaryKey1 = 0b0000001;
+    int binaryKey2 = 0b0000010;
+    int binaryKey3 = 0b0000100;
+    int binaryKey4 = 0b0001000;
+    int binaryKey5 = 0b0010000;
+
+    // Convert binary key to string
+    std::string keyString1 = std::bitset<8>(binaryKey1).to_string();
+    std::string keyString2 = std::bitset<8>(binaryKey2).to_string();
+    std::string keyString3 = std::bitset<8>(binaryKey3).to_string();
+    std::string keyString4 = std::bitset<8>(binaryKey4).to_string();
+    std::string keyString5 = std::bitset<8>(binaryKey5).to_string();
+
+    // Add key-value pair to map
+    bitdict[keyString1] = {"-h", "--help", "print this help list and exit"};
+    bitdict[keyString2] = {"-f", "FNAME", "--file", "FNAME", "read the prompts from an external text file"};
+    bitdict[keyString3] = {"-n", "N", "--n-predict", "N", "number of tokens to predict in generating a completion"};
+    bitdict[keyString4] = {"-t", "N", "--threads", "N", "number of threads to use"};
+    bitdict[keyString5] = {"-m", "MODELPATH", "--model", "MODELPATH", "path to llama model to use"};
+
+
+    for (const auto& pair : bitdict) {
+        printf("help dictionary contains a list of arguments specific to this app %s\n", pair.first.substr(pair.first.size() - 5).c_str());
+        for (const auto& element : pair.second) {
+            printf(" %5s", element.c_str());
+        }
+        printf("\n");
+    }
+
+    printf("\nThis is the complete help file in this mock-up illustrative example:\n\n");
+    for (const auto& pair: bitdict) {
+        printf("%s  ",pair.first.c_str());
+        for (const auto& element : pair.second) {
+            printf(" %5s", element.c_str());
+    }
+        printf("\n");
+    }
+
+    // Now we try to use the appcode to select from the help available
+    // app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101
+
+    int app1code = 0b01011;
+    int app2code = 0b10101;
+
+    printf("\nNow processing app with only -h, -t and -f and appcode %3d\n", app1code);
+    if (app1code != 0) {
+        for (const auto& kvp : bitdict) {
+            if ((app1code & std::stoi(kvp.first)) != 0) {
+                printf("%s  ",kvp.first.c_str());
+                for (const auto& element : kvp.second) {
+                    printf(" %5s", element.c_str());
+                }
+                printf("\n");
+            }
+        }
+        printf("\n");
+    }
+
+    printf("\nNow processing app with only -h, -m, -n and appcode %3d\n", app2code);
+    if (app2code != 0) {
+        for (const auto& kvp : bitdict) {
+            if ((app2code & std::stoi(kvp.first)) != 0) {
+                printf("%s  ",kvp.first.c_str());
+                for (const auto& element : kvp.second) {
+                    printf(" %5s", element.c_str());
+                }
+                printf("\n");
+            }
+        }
+        printf("\n");
     }
 
     return 0;

From 297b7b6301b9ecc554084d22fea5ae1f2ae0b79e Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Thu, 5 Oct 2023 17:30:48 +0100
Subject: [PATCH 12/34] Automation

---
 examples/cmap-example/cmap-example.cpp        | 26 +++++++++++++---
 .../cmap-example/find_implemented_args.py     | 30 +++++++++++++++++++
 2 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 examples/cmap-example/find_implemented_args.py

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index bc4e5ce7cdd94..b2a7b2735c597 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -5,6 +5,7 @@
 #include <list>
 #include <string>
 #include <bitset>
+#include <vector>
 
 int main() {
     std::map<std::string, int> dict;
@@ -91,10 +92,10 @@ int main() {
     // Now we try to use the appcode to select from the help available
     // app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101
 
-    int app1code = 0b01011;
-    int app2code = 0b10101;
+    int app1code = 0b0001011;
+    int app2code = 0b0010111;
 
-    printf("\nNow processing app with only -h, -t and -f and appcode %3d\n", app1code);
+    printf("\nNow processing app with only -h, -t and -f implemented and appcode %3d\n", app1code);
     if (app1code != 0) {
         for (const auto& kvp : bitdict) {
             if ((app1code & std::stoi(kvp.first)) != 0) {
@@ -108,7 +109,7 @@ int main() {
         printf("\n");
     }
 
-    printf("\nNow processing app with only -h, -m, -n and appcode %3d\n", app2code);
+    printf("\nNow processing app with only -h, -f, -m and -n implemented and appcode %3d\n", app2code);
     if (app2code != 0) {
         for (const auto& kvp : bitdict) {
             if ((app2code & std::stoi(kvp.first)) != 0) {
@@ -122,5 +123,22 @@ int main() {
         printf("\n");
     }
 
+    // This is more like the general way to do it
+    std::vector<int> appcodes = {2, 5, 11, 17, 23, 31};
+       for (size_t i = 0; i < appcodes.size(); ++i) {
+        int x = appcodes[i];
+        if (x != 0) {
+        for (const auto& kvp : bitdict) {
+            if ((x & std::stoi(kvp.first)) != 0) {
+                printf("Appcode %3d %s  ", x, kvp.first.c_str());
+                for (const auto& element : kvp.second) {
+                    printf(" %5s", element.c_str());
+                }
+                printf("\n");
+            }
+        }
+        printf("\n");
+        }
+    }
     return 0;
 }
\ No newline at end of file
diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
new file mode 100644
index 0000000000000..8d745c822a227
--- /dev/null
+++ b/examples/cmap-example/find_implemented_args.py
@@ -0,0 +1,30 @@
+import os
+import re
+
+def find_arguments(directory):
+    arguments = {}
+
+    # Get a list of all .cpp files in the specified directory
+    cpp_files = [filename for filename in os.listdir(directory) if filename.endswith('.cpp')]
+
+    # Read each .cpp file and search for the specified expressions
+    for filename in cpp_files:
+        with open(os.path.join(directory, filename), 'r') as file:
+            content = file.read()
+
+            # Search for the expressions using regular expressions
+            matches = re.findall(r'argv\s*\[\s*i\s*\]\s*==\s*([\'"])(?P<arg>-[a-zA-Z]+|\-\-[a-zA-Z]+[a-zA-Z0-9-]*)\1', content)
+
+            # Add the found arguments to the dictionary
+            arguments[filename] = [match[1] for match in matches]
+
+    return arguments
+
+
+# Specify the directory you want to search for cpp files
+directory = '/Users/edsilm2/llama.cpp/examples'
+
+# Call the function and print the result
+result = find_arguments(directory)
+for filename, arguments in result.items():
+    print(filename, arguments)
\ No newline at end of file

From 739d6d30227b15114e40881ac29eda04521e2a2c Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Fri, 6 Oct 2023 09:52:33 +0100
Subject: [PATCH 13/34] Automatic helper dev

---
 common/common.h                               |   1 +
 examples/cmap-example/cmap-example.cpp        |   2 +-
 .../cmap-example/find_implemented_args.py     |  55 ++++++---
 help_list.txt                                 | 104 ++++++++++++++++++
 4 files changed, 147 insertions(+), 15 deletions(-)
 create mode 100644 help_list.txt

diff --git a/common/common.h b/common/common.h
index e095c56e309c2..30260486f8896 100644
--- a/common/common.h
+++ b/common/common.h
@@ -79,6 +79,7 @@ struct gpt_params {
     std::string model_draft       = "";                              // draft model for speculative decoding
     std::string model_alias       = "unknown"; // model alias
     std::string prompt            = "";
+    std::string prompt_file       = "";  // store the external prompt file
     std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
     std::string input_prefix      = "";  // string to prefix user inputs with
     std::string input_suffix      = "";  // string to suffix user inputs with
diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index b2a7b2735c597..ece30702aa098 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -130,7 +130,7 @@ int main() {
         if (x != 0) {
         for (const auto& kvp : bitdict) {
             if ((x & std::stoi(kvp.first)) != 0) {
-                printf("Appcode %3d %s  ", x, kvp.first.c_str());
+                printf("appcode %3d %s  ", x, kvp.first.c_str());
                 for (const auto& element : kvp.second) {
                     printf(" %5s", element.c_str());
                 }
diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
index 8d745c822a227..e4668943650ba 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@@ -1,22 +1,27 @@
+# search the specified directory for files that include argv[i] == '-f' or '--file' arguments
+
 import os
 import re
 
 def find_arguments(directory):
     arguments = {}
 
-    # Get a list of all .cpp files in the specified directory
-    cpp_files = [filename for filename in os.listdir(directory) if filename.endswith('.cpp')]
+    # Use os.walk() to traverse through files in directory and subdirectories
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.cpp'):
+                filepath = os.path.join(root, file)
+                with open(filepath, 'r') as file:
+                    content = file.read()
 
-    # Read each .cpp file and search for the specified expressions
-    for filename in cpp_files:
-        with open(os.path.join(directory, filename), 'r') as file:
-            content = file.read()
+                    # Search for the expression "params." and read the attribute without trailing detritus
+                    matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content)
 
-            # Search for the expressions using regular expressions
-            matches = re.findall(r'argv\s*\[\s*i\s*\]\s*==\s*([\'"])(?P<arg>-[a-zA-Z]+|\-\-[a-zA-Z]+[a-zA-Z0-9-]*)\1', content)
+                    # Remove duplicates from matches list
+                    arguments_list = list(set([match.strip() for match in matches]))
 
-            # Add the found arguments to the dictionary
-            arguments[filename] = [match[1] for match in matches]
+                    # Add the matches to the dictionary
+                    arguments[filepath] = arguments_list
 
     return arguments
 
@@ -24,7 +29,29 @@ def find_arguments(directory):
 # Specify the directory you want to search for cpp files
 directory = '/Users/edsilm2/llama.cpp/examples'
 
-# Call the function and print the result
-result = find_arguments(directory)
-for filename, arguments in result.items():
-    print(filename, arguments)
\ No newline at end of file
+if __name__ == '__main__':
+    # Call the find function and print the result
+    result = find_arguments(directory)
+    all_of_them = set()
+    for filename, arguments in result.items():
+        print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n")
+        for argument in arguments:
+            if argument not in all_of_them:
+                all_of_them.add("".join(argument))
+    print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
+
+    with open("help_list.txt", "r") as helpfile:
+        lines = helpfile.read().split("\n")
+        for filename, arguments in result.items():
+            parameters = []
+            for line in lines:
+                for argument in arguments:
+                    if argument in line:
+                        parameters.append(line)
+            all_parameters = set(parameters)            
+            print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n    command-line arguments available and gpt-params functions implemented:\n")
+            if not all_parameters:
+                print(f"    \033[032mNone\033[0m\n")
+            else:
+                for parameter in all_parameters:
+                    print(f"    help: \033[33m{parameter:<30}\033[0m")
\ No newline at end of file
diff --git a/help_list.txt b/help_list.txt
new file mode 100644
index 0000000000000..08dcf67ec977c
--- /dev/null
+++ b/help_list.txt
@@ -0,0 +1,104 @@
+-h, --helpshow this help message and exit
+-i, --interactive     run in interactive mode
+--interactive-first   run in interactive mode and wait for input right away
+-ins, --instructrun in instruction mode (use with Alpaca models)
+--multiline-input     allows you to write or paste multiple lines without ending each in '\\'
+-r PROMPT, --reverse-prompt PROMPT
+    halt generation at PROMPT, return control in interactive mode
+    (can be specified more than once for multiple prompts).
+--color   colorise output to distinguish prompt and user input from generations
+-s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)
+-t N, --threads N     number of threads to use during generation (default: %d)\n", params.n_threads);
+-tb N, --threads-batch N
+    number of threads to use during batch and prompt processing (default: same as --threads)
+-p PROMPT, --prompt PROMPT
+    prompt to start generation with (default: empty)
+-e, --escape    process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)
+--prompt-cache FNAME  file to cache prompt state for faster startup (default: none)
+--prompt-cache-all    if specified, saves user input and generations to cache as well.
+    not supported with --interactive or other interactive options
+--prompt-cache-ro     if specified, uses the prompt cache but does not update it.
+--random-prompt start with a randomized prompt.
+--in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string
+--in-prefix STRING    string to prefix user inputs with (default: empty)
+--in-suffix STRING    string to suffix after user inputs with (default: empty)
+-f FNAME, --file FNAME
+    prompt file to start generation.
+-n N, --n-predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
+-c N, --ctx-size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
+-b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch);
+--top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
+--top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
+--tfs N   tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
+--typical N     locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
+--repeat-last-n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
+--repeat-penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
+--presence-penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
+--frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
+--mirostat N    use Mirostat sampling.
+    Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.
+    (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
+--mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
+--mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
+-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS
+    modifies the likelihood of token appearing in the completion,
+    i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',
+    or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'
+--grammar GRAMMAR     BNF-like grammar to constrain generations (see samples in grammars/ dir)
+--grammar-file FNAME  file to read grammar from
+--cfg-negative-prompt PROMPT
+    negative prompt to use for guidance. (default: empty)
+--cfg-negative-prompt-file FNAME
+    negative prompt file to use for guidance. (default: empty)
+--cfg-scale N   strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
+--rope-scale N  RoPE context linear scaling factor, inverse of --rope-freq-scale
+--rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: loaded from model)
+--rope-freq-scale N   RoPE frequency linear scaling factor (default: loaded from model)
+--ignore-eos    ignore end of stream token and continue generating (implies --logit-bias 2-inf)
+--no-penalize-nldo not penalize newline token
+--memory-f32    use f32 instead of f16 for memory key+value (default: disabled)
+    not recommended: doubles context memory required and no measurable increase in quality
+--temp N  temperature (default: %.1f)\n", (double)params.temp);
+--logits-all    return logits for all tokens in the batch (default: disabled)
+--hellaswag     compute HellaSwag score over random tasks from datafile supplied with -f
+--hellaswag-tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
+--keep N  number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
+--draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft);
+--chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
+-np N, --parallel N   number of parallel sequences to decode (default: %d)\n", params.n_parallel);
+-ns N, --sequences N  number of sequences to decode (default: %d)\n", params.n_sequences);
+-cb, --cont-batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)
+    if (llama_mlock_supported()) {
+    --mlock   force system to keep model in RAM rather than swapping or compressing
+    }
+    if (llama_mmap_supported()) {
+    --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)
+    }
+--numa    attempt optimizations that help on some NUMA systems
+    if run without this previously, it is recommended to drop the system page cache before using this
+    see https://github.com/ggerganov/llama.cpp/issues/1437
+#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
+-ngl N, --n-gpu-layers N
+    number of layers to store in VRAM
+-ngld N, --n-gpu-layers-draft N
+    number of layers to store in VRAM for the draft model
+-ts SPLIT --tensor-split SPLIT
+    how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1
+-mg i, --main-gpu i   the GPU to use for scratch and small tensors
+#ifdef GGML_USE_CUBLAS
+-nommq, --no-mul-mat-q
+    use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.
+    Not recommended since this is both slower and uses more VRAM.
+#endif // GGML_USE_CUBLAS
+#endif
+--verbose-promptprint prompt before generation
+    fprintf(stderr, "  --simple-io     use basic IO for better compatibility in subprocesses and limited consoles
+--lora FNAME    apply LoRA adapter (implies --no-mmap)
+--lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)
+--lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter
+-m FNAME, --model FNAME
+    model path (default: %s)\n", params.model.c_str());
+-md FNAME, --model-draft FNAME
+    draft model for speculative decoding (default: %s)\n", params.model.c_str());
+-ld LOGDIR, --logdir LOGDIR
+    path under which to save YAML logs (no logging if unset)
\ No newline at end of file

From 7a4dcff667833987a453d615362a9d41ab40807d Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Fri, 6 Oct 2023 14:50:17 +0100
Subject: [PATCH 14/34] Update contextual help dev

---
 .../cmap-example/find_implemented_args.py     |  62 ++++++++---
 help_list.txt                                 | 104 +++++++++---------
 2 files changed, 98 insertions(+), 68 deletions(-)

diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
index e4668943650ba..5c99a284de89e 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@@ -2,6 +2,18 @@
 
 import os
 import re
+import collections
+import re
+
+def replace_dashes_with_underscores(filename):
+    with open(filename, 'r') as file:
+        content = file.read()
+        
+    # Match '-' surrounded by word characters on both sides and replace with '_'
+    replaced_content = re.sub(r'(\w)-(\w)', r'\1_\2', content)
+    
+    with open(filename, 'w') as file:
+        file.write(replaced_content)
 
 def find_arguments(directory):
     arguments = {}
@@ -14,9 +26,8 @@ def find_arguments(directory):
                 with open(filepath, 'r') as file:
                     content = file.read()
 
-                    # Search for the expression "params." and read the attribute without trailing detritus
-                    matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content)
-
+                    # Search for the expression "params." excluding prefixes and read the attribute without trailing detritus
+                    matches = re.findall(r'(?:^|\s)params\.(.*?)(?=[\). <,;}]|\Z)', content)
                     # Remove duplicates from matches list
                     arguments_list = list(set([match.strip() for match in matches]))
 
@@ -25,28 +36,34 @@ def find_arguments(directory):
 
     return arguments
 
-
-# Specify the directory you want to search for cpp files
-directory = '/Users/edsilm2/llama.cpp/examples'
-
-if __name__ == '__main__':
-    # Call the find function and print the result
-    result = find_arguments(directory)
+def output_results(result):
+    sorted_result = collections.OrderedDict(sorted(result.items()))
     all_of_them = set()
-    for filename, arguments in result.items():
-        print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n")
+    for filename, arguments in sorted_result.items():
+        print(f"Filename: \033[32m{filename.split('/')[-1]}\033[0m, arguments: {arguments}\n")
         for argument in arguments:
             if argument not in all_of_them:
                 all_of_them.add("".join(argument))
     print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
+    return sorted_result
 
-    with open("help_list.txt", "r") as helpfile:
+def find_parameters(file, sorted_result):
+     with open(file, "r") as helpfile:
         lines = helpfile.read().split("\n")
-        for filename, arguments in result.items():
+        for filename, arguments in sorted_result.items():
             parameters = []
             for line in lines:
                 for argument in arguments:
-                    if argument in line:
+                    # need to try to avoid spurious matches
+                    argument1 = "--" + argument + " "
+                    if argument1 in line:
+                        parameters.append(line)
+                    # need to try to avoid spurious matches
+                    argument2 = "params." + argument.split('n_')[-1]
+                    if argument2 in line:
+                        parameters.append(line)
+                    argument3 = "params." + argument
+                    if argument3 in line:
                         parameters.append(line)
             all_parameters = set(parameters)            
             print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n    command-line arguments available and gpt-params functions implemented:\n")
@@ -54,4 +71,17 @@ def find_arguments(directory):
                 print(f"    \033[032mNone\033[0m\n")
             else:
                 for parameter in all_parameters:
-                    print(f"    help: \033[33m{parameter:<30}\033[0m")
\ No newline at end of file
+                    print(f"    help: \033[33m{parameter:<30}\033[0m")
+
+
+# Specify the directory you want to search for cpp files
+directory = '/Users/edsilm2/llama.cpp/examples'
+
+if __name__ == '__main__':
+    # First we alter all the hyphenated help words in help-file.txt to underscores
+    replace_dashes_with_underscores('help_list.txt')
+    # Call the find function and output the result
+    result = find_arguments(directory)
+    sorted = output_results(result)
+    # analyse the files and what they contain
+    find_parameters("help_list.txt", sorted)
\ No newline at end of file
diff --git a/help_list.txt b/help_list.txt
index 08dcf67ec977c..7bf5b8c78a385 100644
--- a/help_list.txt
+++ b/help_list.txt
@@ -1,104 +1,104 @@
 -h, --helpshow this help message and exit
 -i, --interactive     run in interactive mode
---interactive-first   run in interactive mode and wait for input right away
+--interactive_first   run in interactive mode and wait for input right away
 -ins, --instructrun in instruction mode (use with Alpaca models)
---multiline-input     allows you to write or paste multiple lines without ending each in '\\'
--r PROMPT, --reverse-prompt PROMPT
+--multiline_input     allows you to write or paste multiple lines without ending each in '\\'
+-r PROMPT, --reverse_prompt PROMPT
     halt generation at PROMPT, return control in interactive mode
     (can be specified more than once for multiple prompts).
 --color   colorise output to distinguish prompt and user input from generations
 -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)
 -t N, --threads N     number of threads to use during generation (default: %d)\n", params.n_threads);
--tb N, --threads-batch N
+-tb N, --threads_batch N
     number of threads to use during batch and prompt processing (default: same as --threads)
 -p PROMPT, --prompt PROMPT
     prompt to start generation with (default: empty)
 -e, --escape    process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)
---prompt-cache FNAME  file to cache prompt state for faster startup (default: none)
---prompt-cache-all    if specified, saves user input and generations to cache as well.
+--prompt_cache FNAME  file to cache prompt state for faster startup (default: none)
+--prompt_cache_all    if specified, saves user input and generations to cache as well.
     not supported with --interactive or other interactive options
---prompt-cache-ro     if specified, uses the prompt cache but does not update it.
---random-prompt start with a randomized prompt.
---in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string
---in-prefix STRING    string to prefix user inputs with (default: empty)
---in-suffix STRING    string to suffix after user inputs with (default: empty)
+--prompt_cache_ro     if specified, uses the prompt cache but does not update it.
+--random_prompt start with a randomized prompt.
+--in_prefix_bos prefix BOS to user inputs, preceding the `--in_prefix` string
+--in_prefix STRING    string to prefix user inputs with (default: empty)
+--in_suffix STRING    string to suffix after user inputs with (default: empty)
 -f FNAME, --file FNAME
     prompt file to start generation.
--n N, --n-predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
--c N, --ctx-size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
--b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch);
---top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
---top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
+-n N, --n_predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
+-c N, --ctx_size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
+-b N, --batch_size N  batch size for prompt processing (default: %d)\n", params.n_batch);
+--top_k N top_k sampling (default: %d, 0 = disabled)\n", params.top_k);
+--top_p N top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
 --tfs N   tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
 --typical N     locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
---repeat-last-n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
---repeat-penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
---presence-penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
---frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
+--repeat_last_n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
+--repeat_penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
+--presence_penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
+--frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
 --mirostat N    use Mirostat sampling.
     Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.
     (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
---mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
---mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
--l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS
+--mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
+--mirostat_ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
+-l TOKEN_ID(+/-)BIAS, --logit_bias TOKEN_ID(+/-)BIAS
     modifies the likelihood of token appearing in the completion,
-    i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',
-    or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'
---grammar GRAMMAR     BNF-like grammar to constrain generations (see samples in grammars/ dir)
---grammar-file FNAME  file to read grammar from
---cfg-negative-prompt PROMPT
+    i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello',
+    or `--logit_bias 15043_1` to decrease likelihood of token ' Hello'
+--grammar GRAMMAR     BNF_like grammar to constrain generations (see samples in grammars/ dir)
+--grammar_file FNAME  file to read grammar from
+--cfg_negative_prompt PROMPT
     negative prompt to use for guidance. (default: empty)
---cfg-negative-prompt-file FNAME
+--cfg_negative_prompt_file FNAME
     negative prompt file to use for guidance. (default: empty)
---cfg-scale N   strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
---rope-scale N  RoPE context linear scaling factor, inverse of --rope-freq-scale
---rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: loaded from model)
---rope-freq-scale N   RoPE frequency linear scaling factor (default: loaded from model)
---ignore-eos    ignore end of stream token and continue generating (implies --logit-bias 2-inf)
---no-penalize-nldo not penalize newline token
---memory-f32    use f32 instead of f16 for memory key+value (default: disabled)
+--cfg_scale N   strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
+--rope_scale N  RoPE context linear scaling factor, inverse of --rope_freq_scale
+--rope_freq_base N    RoPE base frequency, used by NTK_aware scaling (default: loaded from model)
+--rope_freq_scale N   RoPE frequency linear scaling factor (default: loaded from model)
+--ignore_eos    ignore end of stream token and continue generating (implies --logit_bias 2_inf)
+--no_penalize_nldo not penalize newline token
+--memory_f32    use f32 instead of f16 for memory key+value (default: disabled)
     not recommended: doubles context memory required and no measurable increase in quality
 --temp N  temperature (default: %.1f)\n", (double)params.temp);
---logits-all    return logits for all tokens in the batch (default: disabled)
+--logits_all    return logits for all tokens in the batch (default: disabled)
 --hellaswag     compute HellaSwag score over random tasks from datafile supplied with -f
---hellaswag-tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
+--hellaswag_tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
 --keep N  number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
 --draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft);
 --chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
 -np N, --parallel N   number of parallel sequences to decode (default: %d)\n", params.n_parallel);
 -ns N, --sequences N  number of sequences to decode (default: %d)\n", params.n_sequences);
--cb, --cont-batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)
+-cb, --cont_batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)
     if (llama_mlock_supported()) {
     --mlock   force system to keep model in RAM rather than swapping or compressing
     }
     if (llama_mmap_supported()) {
-    --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)
+    --no_mmap do not memory_map model (slower load but may reduce pageouts if not using mlock)
     }
 --numa    attempt optimizations that help on some NUMA systems
     if run without this previously, it is recommended to drop the system page cache before using this
     see https://github.com/ggerganov/llama.cpp/issues/1437
 #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
--ngl N, --n-gpu-layers N
+-ngl N, --n_gpu_layers N
     number of layers to store in VRAM
--ngld N, --n-gpu-layers-draft N
+-ngld N, --n_gpu_layers_draft N
     number of layers to store in VRAM for the draft model
--ts SPLIT --tensor-split SPLIT
-    how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1
--mg i, --main-gpu i   the GPU to use for scratch and small tensors
+-ts SPLIT --tensor_split SPLIT
+    how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1
+-mg i, --main_gpu i   the GPU to use for scratch and small tensors
 #ifdef GGML_USE_CUBLAS
--nommq, --no-mul-mat-q
+-nommq, --no_mul_mat_q
     use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.
     Not recommended since this is both slower and uses more VRAM.
 #endif // GGML_USE_CUBLAS
 #endif
---verbose-promptprint prompt before generation
-    fprintf(stderr, "  --simple-io     use basic IO for better compatibility in subprocesses and limited consoles
---lora FNAME    apply LoRA adapter (implies --no-mmap)
---lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)
---lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter
+--verbose_promptprint prompt before generation
+    fprintf(stderr, "  --simple_io     use basic IO for better compatibility in subprocesses and limited consoles
+--lora FNAME    apply LoRA adapter (implies --no_mmap)
+--lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap)
+--lora_base FNAME     optional model to use as a base for the layers modified by the LoRA adapter
 -m FNAME, --model FNAME
     model path (default: %s)\n", params.model.c_str());
--md FNAME, --model-draft FNAME
+-md FNAME, --model_draft FNAME
     draft model for speculative decoding (default: %s)\n", params.model.c_str());
 -ld LOGDIR, --logdir LOGDIR
     path under which to save YAML logs (no logging if unset)
\ No newline at end of file

From 0d70518220102c6d9e0cf71339587a1eb763c165 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Fri, 6 Oct 2023 22:19:29 +0100
Subject: [PATCH 15/34] Update contextual help

---
 examples/cmap-example/find_implemented_args.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
index 5c99a284de89e..feae80ffd1764 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@@ -27,12 +27,13 @@ def find_arguments(directory):
                     content = file.read()
 
                     # Search for the expression "params." excluding prefixes and read the attribute without trailing detritus
-                    matches = re.findall(r'(?:^|\s)params\.(.*?)(?=[\). <,;}]|\Z)', content)
+                    # matches = re.findall(r'(?:^|\s)params\.(.*)(?=[\). <,;}]|\Z)', content)
+                    matches = set(re.findall(r'(?:^|\b)params\.([a-zA-Z_0-9]*)(?=[\). <,;}]|\Z)', content))
                     # Remove duplicates from matches list
-                    arguments_list = list(set([match.strip() for match in matches]))
+                    # arguments_list = list(set([match.strip() for match in matches]))
 
                     # Add the matches to the dictionary
-                    arguments[filepath] = arguments_list
+                    arguments[filepath] = matches
 
     return arguments
 
@@ -53,6 +54,12 @@ def find_parameters(file, sorted_result):
         for filename, arguments in sorted_result.items():
             parameters = []
             for line in lines:
+                for argument in arguments:
+                    # building pattern to avoid spurious matches
+                    pattern = r"(?:--{}\s)|(?:params\.{}[\s.,();])".format(argument, argument.split('n_')[-1])
+                    if re.search(pattern, line):
+                        parameters.append(line)
+            '''for line in lines:
                 for argument in arguments:
                     # need to try to avoid spurious matches
                     argument1 = "--" + argument + " "
@@ -64,7 +71,7 @@ def find_parameters(file, sorted_result):
                         parameters.append(line)
                     argument3 = "params." + argument
                     if argument3 in line:
-                        parameters.append(line)
+                        parameters.append(line)'''
             all_parameters = set(parameters)            
             print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n    command-line arguments available and gpt-params functions implemented:\n")
             if not all_parameters:

From 9c5d6f0ef661c17a8b140bf122b3e95bf6b288e2 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Sat, 7 Oct 2023 21:40:45 +0100
Subject: [PATCH 16/34] Update helper dev

---
 common/common.h                               | 24 +++---
 .../cmap-example/find_implemented_args.py     | 77 ++++++++++++++-----
 examples/cmap-example/read_common_h.py        | 34 ++++++++
 examples/parallel/README.md                   |  8 +-
 4 files changed, 112 insertions(+), 31 deletions(-)
 create mode 100644 examples/cmap-example/read_common_h.py

diff --git a/common/common.h b/common/common.h
index 30260486f8896..887142cf99a7f 100644
--- a/common/common.h
+++ b/common/common.h
@@ -35,21 +35,21 @@ int32_t get_num_physical_cores();
 
 struct gpt_params {
     uint32_t seed                           = -1;   // RNG seed
-    int32_t n_threads                       = get_num_physical_cores();
-    int32_t n_threads_batch                 = -1;   // number of threads to use for batch processing (-1 = use n_threads)
+    int32_t n_threads                       = get_num_physical_cores(); // user-defined or num of internal physical cores
+    int32_t n_threads_batch                 = -1;   // num threads for batch proc (-1 = use n_threads)
     int32_t n_predict                       = -1;   // new tokens to predict
     int32_t n_ctx                           = 512;  // context size
-    int32_t n_batch                         = 512;  // batch size for prompt processing (must be >=32 to use BLAS)
+    int32_t n_batch                         = 512;  // batch size for prompt proc (>=32 to use BLAS)
     int32_t n_keep                          = 0;    // number of tokens to keep from initial prompt
     int32_t n_draft                         = 16;   // number of tokens to draft during speculative decoding
     int32_t n_chunks                        = -1;   // max number of chunks to process (-1 = unlimited)
     int32_t n_parallel                      = 1;    // number of parallel sequences to decode
     int32_t n_sequences                     = 1;    // number of sequences to decode
-    int32_t n_gpu_layers                    = -1;   // number of layers to store in VRAM (-1 - use default)
-    int32_t n_gpu_layers_draft              = -1;   // number of layers to store in VRAM for the draft model (-1 - use default)
+    int32_t n_gpu_layers                    = -1;   // num layers stored in VRAM (-1 for default)
+    int32_t n_gpu_layers_draft              = -1;   // num layers stored in VRAM for draft mod (-1 for default)
     int32_t main_gpu                        = 0;    // the GPU that is used for scratch and small tensors
     float   tensor_split[LLAMA_MAX_DEVICES] = {0};  // how split tensors should be distributed across GPUs
-    int32_t n_probs                         = 0;    // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t n_probs                         = 0;    // if > 0, output probabilities of top n_probs tokens.
     int32_t n_beams                         = 0;    // if non-zero then use beam search of given width.
     float   rope_freq_base                  = 0.0f; // RoPE base frequency
     float   rope_freq_scale                 = 0.0f; // RoPE frequency scaling factor
@@ -61,7 +61,7 @@ struct gpt_params {
     float   typical_p         = 1.00f; // 1.0 = disabled
     float   temp              = 0.80f; // 1.0 = disabled
     float   repeat_penalty    = 1.10f; // 1.0 = disabled
-    int32_t repeat_last_n     = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    int32_t repeat_last_n     = 64;    // last n tokens to penalize (0 = disable, -1 = cxt size)
     float   frequency_penalty = 0.00f; // 0.0 = disabled
     float   presence_penalty  = 0.00f; // 0.0 = disabled
     int32_t mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
@@ -78,7 +78,7 @@ struct gpt_params {
     std::string model             = "models/7B/ggml-model-f16.gguf"; // model path
     std::string model_draft       = "";                              // draft model for speculative decoding
     std::string model_alias       = "unknown"; // model alias
-    std::string prompt            = "";
+    std::string prompt            = "";  // user-provided single prompt
     std::string prompt_file       = "";  // store the external prompt file
     std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
     std::string input_prefix      = "";  // string to prefix user inputs with
@@ -90,11 +90,11 @@ struct gpt_params {
     std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
     std::string lora_base  = "";                              // base model path for the lora adapter
 
-    int  ppl_stride        = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
-    int  ppl_output_type   = 0;     // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
+    int  ppl_stride        = 0;     // stride for ppl calcs. 0: the pre-existing approach will be used.
+    int  ppl_output_type   = 0;     // 0: ppl output as usual, 1: ppl output = num_tokens, ppl, one per line
                                     //                                       (which is more convenient to use for plotting)
                                     //
-    bool hellaswag         = false; // compute HellaSwag score over random tasks from datafile supplied in prompt
+    bool hellaswag         = false; // compute HellaSwag score from datafile given in prompt
     size_t hellaswag_tasks = 400;   // number of tasks to use when computing the HellaSwag score
 
     bool mul_mat_q         = true;  // if true, use mul_mat_q kernels instead of cuBLAS
@@ -109,7 +109,7 @@ struct gpt_params {
     bool escape            = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
     bool interactive_first = false; // wait for user input immediately
     bool multiline_input   = false; // reverse the usage of `\`
-    bool simple_io         = false; // improves compatibility with subprocesses and limited consoles
+    bool simple_io         = false; // improves compat'y with subprocs and ltd consoles
     bool cont_batching     = false; // insert new sequences for decoding on-the-fly
 
     bool input_prefix_bos  = false; // prefix BOS to user inputs, preceding input_prefix
diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
index feae80ffd1764..31d4396980211 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@@ -4,6 +4,8 @@
 import re
 import collections
 import re
+import read_common_h
+
 
 def replace_dashes_with_underscores(filename):
     with open(filename, 'r') as file:
@@ -48,6 +50,13 @@ def output_results(result):
     print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
     return sorted_result
 
+def concatenate(v):
+    concatenated_element = ""
+    for i, element in enumerate(v):
+        if element == "//":
+            concatenated_element = " ".join(v[i:])
+    return concatenated_element
+
 def find_parameters(file, sorted_result):
      with open(file, "r") as helpfile:
         lines = helpfile.read().split("\n")
@@ -59,36 +68,68 @@ def find_parameters(file, sorted_result):
                     pattern = r"(?:--{}\s)|(?:params\.{}[\s.,();])".format(argument, argument.split('n_')[-1])
                     if re.search(pattern, line):
                         parameters.append(line)
-            '''for line in lines:
-                for argument in arguments:
-                    # need to try to avoid spurious matches
-                    argument1 = "--" + argument + " "
-                    if argument1 in line:
-                        parameters.append(line)
-                    # need to try to avoid spurious matches
-                    argument2 = "params." + argument.split('n_')[-1]
-                    if argument2 in line:
-                        parameters.append(line)
-                    argument3 = "params." + argument
-                    if argument3 in line:
-                        parameters.append(line)'''
-            all_parameters = set(parameters)            
-            print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n    command-line arguments available and gpt-params functions implemented:\n")
+    
+            all_parameters = set(parameters)
+            file = filename.split('/')[-1]
+            print("\n\n"+"#"*(10+len(file)))         
+            print(f"Filename: \033[32m{file}\033[0m")
+            print("#"*(10+len(file))) 
+            print(f"\n\n    command-line arguments available and gpt-params functions implemented (TODO: multi-line helps NEED SOME WORK):\n")
+
             if not all_parameters:
                 print(f"    \033[032mNone\033[0m\n")
+            
+            # first do it the original way 
             else:
+                help_count = 0
                 for parameter in all_parameters:
-                    print(f"    help: \033[33m{parameter:<30}\033[0m")
+                    help_count += 1
+                    print(f"{help_count:>2} help: \033[33m{parameter:<30}\033[0m")
 
+                # now do it the new way
+                print("\nNow we extract the original gpt_params definition and defaults for implemented arguments:\n")
+                gpt_count = 0
+                for k,v in read_common_h.parameters.items():
+                    if not read_common_h.parameters.items():
+                        print(f"    \033[032mNone\033[0m\n")
+                    elif k in arguments:
+                        # print(f"gpt_params: \033[33m{k:>20}\033[0m values: {v}")
+                        concatenated_element = concatenate(v)
+                        gpt_count += 1
+                        print(f"{gpt_count:>2} gpt_param: \033[32m{k:>19}; \033[34mrole: \033[33m{concatenated_element:<60}\033[0m;  \033[34mdefault: \033[30m{v[1]:<10}\033[0m ")
+                
+                # searching the other way round is quicker:
+                print("\nSearching the other way round is quicker:\n")
+                key_count = 0
+                for argument in arguments:
+                    if argument in read_common_h.parameters:
+                        key_count += 1
+                        print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(read_common_h.parameters[argument]):<60}; default: {read_common_h.parameters[argument][1]:<10}")
+                if help_count == gpt_count and gpt_count == key_count:
+                    print("\n\033[032mNo unresolved help-list incompatibilities with this app.\033[0m")
+                else:
+                    print("\n\033[031mThis app requires some attention regarding help-function consistency.\033[0m")
 
 # Specify the directory you want to search for cpp files
 directory = '/Users/edsilm2/llama.cpp/examples'
 
 if __name__ == '__main__':
+    # get the parameters from the common.h file utiity we import
+    print(read_common_h.parameters)
+    # So now we've got the gpt_parameters in this parameters dict
+
     # First we alter all the hyphenated help words in help-file.txt to underscores
-    replace_dashes_with_underscores('help_list.txt')
-    # Call the find function and output the result
+    # replace_dashes_with_underscores('help_list.txt')
+    # This above may no longer be needed
+
+    print("\n####################### find parameters #################################")
+    # Call the find function to collect all the params.attributes and output the result
     result = find_arguments(directory)
+
+    print("\n######################################## output_results #################################")
+    # sort the results and output them
     sorted = output_results(result)
+
+    print("\n######################## find help context parameters #################################")
     # analyse the files and what they contain
     find_parameters("help_list.txt", sorted)
\ No newline at end of file
diff --git a/examples/cmap-example/read_common_h.py b/examples/cmap-example/read_common_h.py
new file mode 100644
index 0000000000000..a683d7662b204
--- /dev/null
+++ b/examples/cmap-example/read_common_h.py
@@ -0,0 +1,34 @@
+# read common.h and extract the parameters name list
+
+import re
+
+# Read the file into separate lines
+with open('common/common.h', 'r') as file:
+    lines = file.read().split('\n')
+
+parameters = {}
+inside = False
+for line in lines:
+    # non_whitespace_elements = re.findall(r"\S+", line)
+    non_whitespace_elements = re.findall(r"[^\s}{=;]+", line)
+    print(f"nwe = \033[33m{non_whitespace_elements}\033[0m")
+    if non_whitespace_elements and non_whitespace_elements[0] == "struct":
+        inside = True
+    if len(non_whitespace_elements) > 2 and inside:
+        # note: cannot use nwe[0] because types do not generate unique keys and so overwrite
+        # here we deliberately add back the key so we can make a manual change when it is different
+        parameters[non_whitespace_elements[1]] = non_whitespace_elements[1:]
+        for k, v in parameters.items():
+            print(f"key: {k:<20}; values: {v}")
+            
+            concatenated_element = ""
+            for i, element in enumerate(v):
+                if element == "//":
+                    concatenated_element = " ".join(v[i:])
+                    # break
+            print(" "*10 + f"parameter: \033[32m{k:>40} \033[34mdefault: \033[30m{v[1]:>5} \033[34mcommment: \033[33m{concatenated_element:80}\033[0m")
+    
+    # this is a bit of a hack to terminate the harvest 
+    if len(non_whitespace_elements) > 2 and non_whitespace_elements[1] == "infill":
+        inside = False
+        break
\ No newline at end of file
diff --git a/examples/parallel/README.md b/examples/parallel/README.md
index 4d0fe5cef12fa..fea4a9d918c70 100644
--- a/examples/parallel/README.md
+++ b/examples/parallel/README.md
@@ -1,3 +1,9 @@
 # llama.cpp/example/parallel
 
-Simplified simluation for serving incoming requests in parallel
+Simplified simulation for serving incoming requests in parallel
+
+Running this using the 100 questions in examples/jeopardy/questions.txt
+on an M2 MAX (38 core) with 32GB unified memory on MacOS Sonoma 14.0
+takes about 235 seconds with sequential responses (-ns 1) and 45 seconds
+with 64 parallel responses (-ns 64) in both cases generating 100 answers (-np 100)
+using a context of 8192 (-c 8192).

From 982c9089847e07ae33ccaebfbfc99778a55cc7fc Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Sun, 8 Oct 2023 22:26:13 +0100
Subject: [PATCH 17/34] Update contextual help

---
 common/common.cpp                             |  15 +-
 common/common.h                               |   1 +
 .../cmap-example/find_implemented_args.py     | 126 +++++++++--
 examples/cmap-example/read_common_h.py        |  27 ++-
 help_list.txt                                 | 208 +++++++++---------
 5 files changed, 240 insertions(+), 137 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 4b233786abc00..4d4ecf03df092 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -621,6 +621,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
     return true;
 }
 
+// There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too):
+// --embedding, --beams, --ppl-stride, --ppl-output-type, memory-f32, no-mmap, mlock, use-color, nprobs
 void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("usage: %s [options]\n", argv[0]);
     printf("\n");
@@ -667,7 +669,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("                        (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
     printf("  --mirostat-lr N       Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
     printf("  --mirostat-ent N      Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
-    printf("  -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n");
+    printf("  -l T, --logit-bias T  T = TOKEN_ID(plus/minus)BIAS\n");
     printf("                        modifies the likelihood of token appearing in the completion,\n");
     printf("                        i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n");
     printf("                        or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
@@ -682,7 +684,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("  --rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: loaded from model)\n");
     printf("  --rope-freq-scale N   RoPE frequency linear scaling factor (default: loaded from model)\n");
     printf("  --ignore-eos          ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
-    printf("  --no-penalize-nl      do not penalize newline token\n");
+    printf("  --no-penalize-nl      do not penalize newline token (default is DO penalise nl token)\n");
     printf("  --memory-f32          use f32 instead of f16 for memory key+value (default: disabled)\n");
     printf("                        not recommended: doubles context memory required and no measurable increase in quality\n");
     printf("  --temp N              temperature (default: %.1f)\n", (double)params.temp);
@@ -729,6 +731,15 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("                        draft model for speculative decoding (default: %s)\n", params.model.c_str());
     printf("  -ld LOGDIR, --logdir LOGDIR\n");
     printf("                        path under which to save YAML logs (no logging if unset)\n");
+    printf("  --ppl-stride          stride for ppl calcs. 0 (default): the pre-existing approach will be used.\n");
+    printf("  --ppl-output-type     0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n");
+    printf("  --embedding           0 (default): get only sentence embedding\n");
+    printf("  --beams N             0 (default): if non-zero use beam search of given width N.\n");
+    printf("  --memory-f32          0 (default): if true (= 1) disable f16 memory.\n");   
+    printf("  --no-mmap             0 (default): if true use mmap for faster loads.\n");
+    printf("  --mlock               0 (default): if true keep model in memory.\n");  
+    printf("  --use-color           0 (default): use color to distinguish generations from inputs\n");
+    printf("  --nprobs N            if > 0 output the probabilities of the top N tokens\n"); 
     printf("\n");
 }
 
diff --git a/common/common.h b/common/common.h
index 887142cf99a7f..d48ee22e1427e 100644
--- a/common/common.h
+++ b/common/common.h
@@ -75,6 +75,7 @@ struct gpt_params {
     std::string cfg_negative_prompt;       // string to help guidance
     float       cfg_scale         = 1.f;   // How strong is guidance
 
+    std::string help              = "";  // universal help parameter
     std::string model             = "models/7B/ggml-model-f16.gguf"; // model path
     std::string model_draft       = "";                              // draft model for speculative decoding
     std::string model_alias       = "unknown"; // model alias
diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
index 31d4396980211..ac6e6cfdc8695 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@@ -6,7 +6,22 @@
 import re
 import read_common_h
 
-
+# update the source file - usually 'help_list.txt', so the default - in case the source file has been changed
+def update_file(file_from, file_to = "help_list.txt"):
+    # Open the file_from file
+    with open(file_from, "r") as file:
+        lines = file.readlines()
+
+    # Find lines starting with "printf(" and ending with ");" (assumes file_from is written in C/C++)
+    pattern = r'printf\("\s(.*?)\);'
+    matched_lines = [re.search(pattern, line).group(1) for line in lines if re.search(pattern, line)]
+
+    # Save matched lines to file_to
+    with open(file_to, "w") as file:
+        for line in matched_lines:
+            file.write(line + '\n')
+
+# helper fn to make the hyphenated words in a file snake-case for searching
 def replace_dashes_with_underscores(filename):
     with open(filename, 'r') as file:
         content = file.read()
@@ -17,6 +32,13 @@ def replace_dashes_with_underscores(filename):
     with open(filename, 'w') as file:
         file.write(replaced_content)
 
+# helper fn to make the underscored words in a file hyphenated for print
+def replace_underscores_with_dashes(parameter):
+    # Match '_' surrounded by word characters on both sides and replace with '-'
+    return re.sub(r'(\w)_(\w)', r'\1-\2', parameter)
+
+
+# find all instances of "params." in the *.cpp files in a directory
 def find_arguments(directory):
     arguments = {}
 
@@ -28,21 +50,21 @@ def find_arguments(directory):
                 with open(filepath, 'r') as file:
                     content = file.read()
 
-                    # Search for the expression "params." excluding prefixes and read the attribute without trailing detritus
+                    # Search for the expression "params." or "params->" excluding prefixes and read the attribute without trailing detritus
                     # matches = re.findall(r'(?:^|\s)params\.(.*)(?=[\). <,;}]|\Z)', content)
-                    matches = set(re.findall(r'(?:^|\b)params\.([a-zA-Z_0-9]*)(?=[\). <,;}]|\Z)', content))
-                    # Remove duplicates from matches list
-                    # arguments_list = list(set([match.strip() for match in matches]))
+                    matches = set(re.findall(r'(?:^|\b)params[->\.]([a-zA-Z_0-9]*)(?=[\). <,;}]|\Z)', content))
 
                     # Add the matches to the dictionary
                     arguments[filepath] = matches
 
     return arguments
 
+# output a list of the params.attributes for each file
 def output_results(result):
     sorted_result = collections.OrderedDict(sorted(result.items()))
     all_of_them = set()
     for filename, arguments in sorted_result.items():
+        arguments.add("help")
         print(f"Filename: \033[32m{filename.split('/')[-1]}\033[0m, arguments: {arguments}\n")
         for argument in arguments:
             if argument not in all_of_them:
@@ -50,6 +72,7 @@ def output_results(result):
     print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
     return sorted_result
 
+# put all the words after "//" in a dict back together with spaces
 def concatenate(v):
     concatenated_element = ""
     for i, element in enumerate(v):
@@ -57,24 +80,78 @@ def concatenate(v):
             concatenated_element = " ".join(v[i:])
     return concatenated_element
 
+def title_print(filename):
+    title = filename.split('/')[-1]
+    print("\n\n"+"#"*(10+len(title)))         
+    print(f"Filename: \033[32m{title}\033[0m")
+    print("#"*(10+len(title))) 
+
+def substitution_list(parameters):
+    # store untrapped parameters as identicals in case we need to change them later
+    sub_dict = {"n_threads": "threads",
+                "n_ctx": "ctx_size",
+                "n_draft" : "draft",
+                "n_threads_batch" : "threads_batch",
+                "n_chunks" : "chunks",
+                "n_batch" : "batch_size",
+                "n_sequences" : "sequences",
+                "n_parallel" : "parallel",
+                "n_beams" : "beams",
+                "n_keep" : "keep",
+                "n_probs" : "nprobs",
+                "path_prompt_cache" : "prompt_cache",
+                "input_prefix" : "in_prefix",
+                "input_suffix" : "in_suffix",
+                "input_prefix_bos" : "in_prefix_bos",
+                "antiprompt" : "reverse_prompt",
+                "mul_mat_q" : "no_mul_mat_q",
+                "use_mmap" : "no_mmap",
+                "use_mlock" : "mlock",
+                "model_alias" : "alias",
+                "tfs_z" : "tfs",
+                "use_color" : "color",
+                "logit_bias" : "logit_bias",
+                "ignore_eos" : "ignore_eos",
+                "mirostat_tau" : "mirostat_ent",
+                "mirostat_eta" : "mirostat_lr",
+                "penalize_nl" : "no_penalize_nl",
+                "typical_p" : "typical",
+                "mem_size" : "mem_size",
+                "mem_buffer" : "mem_buffer",
+                "no_alloc" : "no_alloc"
+                }
+    new_parameters = []
+    for parameter in parameters:
+        if parameter in sub_dict:
+            # we need both for future reference 
+            new_parameters.append(parameter)
+            new_parameters.append(sub_dict[parameter])
+        else:
+            new_parameters.append(parameter)
+    return new_parameters
+
+# output the lines of the help file
 def find_parameters(file, sorted_result):
      with open(file, "r") as helpfile:
         lines = helpfile.read().split("\n")
         for filename, arguments in sorted_result.items():
+            # we try to fix up some variant labelling in help_file.txt
+            arguments = substitution_list(arguments)
             parameters = []
             for line in lines:
                 for argument in arguments:
                     # building pattern to avoid spurious matches
-                    pattern = r"(?:--{}\s)|(?:params\.{}[\s.,();])".format(argument, argument.split('n_')[-1])
-                    if re.search(pattern, line):
+                    # pattern = r"(?:--{}\s)|(?:params\.{}[\s.,\.();])".format(argument, argument.split('n_')[-1])
+                    pattern = r"(?:--{}\s)|(?:params\.{}(?=[\s.,\.\(\);]|\.+\w))".format(argument, argument.split('n_')[-1])
+                    # pattern = r"(?<=params\.)\w+(?=\.\w+|\.|,|;|\}|\{|\(|\)|\.)"
+                    # bit of a hack to exclude --attributes at the end of help comment lines
+                    if re.search(pattern, line[:50]):
                         parameters.append(line)
     
             all_parameters = set(parameters)
-            file = filename.split('/')[-1]
-            print("\n\n"+"#"*(10+len(file)))         
-            print(f"Filename: \033[32m{file}\033[0m")
-            print("#"*(10+len(file))) 
-            print(f"\n\n    command-line arguments available and gpt-params functions implemented (TODO: multi-line helps NEED SOME WORK):\n")
+
+            title_print(filename)
+            print(f"\nCommand-line arguments available and gpt-params functions implemented (TODO: multi-line helps NEED SOME WORK):\n")
 
             if not all_parameters:
                 print(f"    \033[032mNone\033[0m\n")
@@ -83,11 +160,16 @@ def find_parameters(file, sorted_result):
             else:
                 help_count = 0
                 for parameter in all_parameters:
-                    help_count += 1
-                    print(f"{help_count:>2} help: \033[33m{parameter:<30}\033[0m")
+                    # reverse the hypthen/underscore pattern just for printing
+                    replaced_param = replace_underscores_with_dashes(parameter)
+                    if not parameter.startswith("    "):
+                        help_count += 1
+                        print(f"{help_count:>2} help: \033[33m{replaced_param:<30}\033[0m")
+                    else:
+                        print(f"   help: \033[33m{replaced_param:<30}\033[0m")
 
                 # now do it the new way
-                print("\nNow we extract the original gpt_params definition and defaults for implemented arguments:\n")
+                print("\nNow we extract the original gpt_params definition from common.h with the defaults for implemented arguments:\n")
                 gpt_count = 0
                 for k,v in read_common_h.parameters.items():
                     if not read_common_h.parameters.items():
@@ -99,14 +181,14 @@ def find_parameters(file, sorted_result):
                         print(f"{gpt_count:>2} gpt_param: \033[32m{k:>19}; \033[34mrole: \033[33m{concatenated_element:<60}\033[0m;  \033[34mdefault: \033[30m{v[1]:<10}\033[0m ")
                 
                 # searching the other way round is quicker:
-                print("\nSearching the other way round is quicker:\n")
+                print("\nSearching the other way round is more efficient:\n")
                 key_count = 0
-                for argument in arguments:
+                for argument in set(arguments):
                     if argument in read_common_h.parameters:
                         key_count += 1
                         print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(read_common_h.parameters[argument]):<60}; default: {read_common_h.parameters[argument][1]:<10}")
                 if help_count == gpt_count and gpt_count == key_count:
-                    print("\n\033[032mNo unresolved help-list incompatibilities with this app.\033[0m")
+                    print(f"\n\033[032mNo unresolved help-list incompatibilities with \033[33m{filename.split('/')[-1]}\033[0m")
                 else:
                     print("\n\033[031mThis app requires some attention regarding help-function consistency.\033[0m")
 
@@ -114,13 +196,17 @@ def find_parameters(file, sorted_result):
 directory = '/Users/edsilm2/llama.cpp/examples'
 
 if __name__ == '__main__':
+   
+   # update the source help file from C++ source (this works exactly as required)
+    update_file("common/common.cpp", "help_list.txt")
+
     # get the parameters from the common.h file utiity we import
     print(read_common_h.parameters)
     # So now we've got the gpt_parameters in this parameters dict
 
     # First we alter all the hyphenated help words in help-file.txt to underscores
-    # replace_dashes_with_underscores('help_list.txt')
-    # This above may no longer be needed
+    # we later reverse these changers before printing the help lines
+    replace_dashes_with_underscores('help_list.txt')
 
     print("\n####################### find parameters #################################")
     # Call the find function to collect all the params.attributes and output the result
diff --git a/examples/cmap-example/read_common_h.py b/examples/cmap-example/read_common_h.py
index a683d7662b204..1c18d496002aa 100644
--- a/examples/cmap-example/read_common_h.py
+++ b/examples/cmap-example/read_common_h.py
@@ -7,6 +7,9 @@
     lines = file.read().split('\n')
 
 parameters = {}
+# we add the logit_bias parameter which otherwise is not found
+parameters['logit_bias']=['logit_bias', '0', '//', 'way', 'to', 'alter', 'prob', 'of', 'particular', 'words']
+
 inside = False
 for line in lines:
     # non_whitespace_elements = re.findall(r"\S+", line)
@@ -18,17 +21,19 @@
         # note: cannot use nwe[0] because types do not generate unique keys and so overwrite
         # here we deliberately add back the key so we can make a manual change when it is different
         parameters[non_whitespace_elements[1]] = non_whitespace_elements[1:]
-        for k, v in parameters.items():
-            print(f"key: {k:<20}; values: {v}")
-            
-            concatenated_element = ""
-            for i, element in enumerate(v):
-                if element == "//":
-                    concatenated_element = " ".join(v[i:])
-                    # break
-            print(" "*10 + f"parameter: \033[32m{k:>40} \033[34mdefault: \033[30m{v[1]:>5} \033[34mcommment: \033[33m{concatenated_element:80}\033[0m")
-    
+        # remove spurious entry caused by eccentric status of logit_bias
+        if "float>" in parameters and parameters["float>"][1] == 'logit_bias':
+            del parameters["float>"]
+
     # this is a bit of a hack to terminate the harvest 
     if len(non_whitespace_elements) > 2 and non_whitespace_elements[1] == "infill":
         inside = False
-        break
\ No newline at end of file
+        break
+for k, v in parameters.items():
+    print(f"key: {k:<20}; values: {v}")
+    concatenated_element = ""
+    for i, element in enumerate(v):
+        if element == "//":
+            concatenated_element = " ".join(v[i:])
+            # break
+    print(" "*10 + f"parameter: \033[32m{k:>40} \033[34mdefault: \033[30m{v[1]:>5} \033[34mcommment: \033[33m{concatenated_element:80}\033[0m")
diff --git a/help_list.txt b/help_list.txt
index 7bf5b8c78a385..97b91a98259d5 100644
--- a/help_list.txt
+++ b/help_list.txt
@@ -1,104 +1,104 @@
--h, --helpshow this help message and exit
--i, --interactive     run in interactive mode
---interactive_first   run in interactive mode and wait for input right away
--ins, --instructrun in instruction mode (use with Alpaca models)
---multiline_input     allows you to write or paste multiple lines without ending each in '\\'
--r PROMPT, --reverse_prompt PROMPT
-    halt generation at PROMPT, return control in interactive mode
-    (can be specified more than once for multiple prompts).
---color   colorise output to distinguish prompt and user input from generations
--s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)
--t N, --threads N     number of threads to use during generation (default: %d)\n", params.n_threads);
--tb N, --threads_batch N
-    number of threads to use during batch and prompt processing (default: same as --threads)
--p PROMPT, --prompt PROMPT
-    prompt to start generation with (default: empty)
--e, --escape    process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)
---prompt_cache FNAME  file to cache prompt state for faster startup (default: none)
---prompt_cache_all    if specified, saves user input and generations to cache as well.
-    not supported with --interactive or other interactive options
---prompt_cache_ro     if specified, uses the prompt cache but does not update it.
---random_prompt start with a randomized prompt.
---in_prefix_bos prefix BOS to user inputs, preceding the `--in_prefix` string
---in_prefix STRING    string to prefix user inputs with (default: empty)
---in_suffix STRING    string to suffix after user inputs with (default: empty)
--f FNAME, --file FNAME
-    prompt file to start generation.
--n N, --n_predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
--c N, --ctx_size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
--b N, --batch_size N  batch size for prompt processing (default: %d)\n", params.n_batch);
---top_k N top_k sampling (default: %d, 0 = disabled)\n", params.top_k);
---top_p N top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
---tfs N   tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
---typical N     locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
---repeat_last_n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
---repeat_penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
---presence_penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
---frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
---mirostat N    use Mirostat sampling.
-    Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.
-    (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
---mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
---mirostat_ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
--l TOKEN_ID(+/-)BIAS, --logit_bias TOKEN_ID(+/-)BIAS
-    modifies the likelihood of token appearing in the completion,
-    i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello',
-    or `--logit_bias 15043_1` to decrease likelihood of token ' Hello'
---grammar GRAMMAR     BNF_like grammar to constrain generations (see samples in grammars/ dir)
---grammar_file FNAME  file to read grammar from
---cfg_negative_prompt PROMPT
-    negative prompt to use for guidance. (default: empty)
---cfg_negative_prompt_file FNAME
-    negative prompt file to use for guidance. (default: empty)
---cfg_scale N   strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
---rope_scale N  RoPE context linear scaling factor, inverse of --rope_freq_scale
---rope_freq_base N    RoPE base frequency, used by NTK_aware scaling (default: loaded from model)
---rope_freq_scale N   RoPE frequency linear scaling factor (default: loaded from model)
---ignore_eos    ignore end of stream token and continue generating (implies --logit_bias 2_inf)
---no_penalize_nldo not penalize newline token
---memory_f32    use f32 instead of f16 for memory key+value (default: disabled)
-    not recommended: doubles context memory required and no measurable increase in quality
---temp N  temperature (default: %.1f)\n", (double)params.temp);
---logits_all    return logits for all tokens in the batch (default: disabled)
---hellaswag     compute HellaSwag score over random tasks from datafile supplied with -f
---hellaswag_tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
---keep N  number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
---draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft);
---chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
--np N, --parallel N   number of parallel sequences to decode (default: %d)\n", params.n_parallel);
--ns N, --sequences N  number of sequences to decode (default: %d)\n", params.n_sequences);
--cb, --cont_batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)
-    if (llama_mlock_supported()) {
-    --mlock   force system to keep model in RAM rather than swapping or compressing
-    }
-    if (llama_mmap_supported()) {
-    --no_mmap do not memory_map model (slower load but may reduce pageouts if not using mlock)
-    }
---numa    attempt optimizations that help on some NUMA systems
-    if run without this previously, it is recommended to drop the system page cache before using this
-    see https://github.com/ggerganov/llama.cpp/issues/1437
-#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
--ngl N, --n_gpu_layers N
-    number of layers to store in VRAM
--ngld N, --n_gpu_layers_draft N
-    number of layers to store in VRAM for the draft model
--ts SPLIT --tensor_split SPLIT
-    how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1
--mg i, --main_gpu i   the GPU to use for scratch and small tensors
-#ifdef GGML_USE_CUBLAS
--nommq, --no_mul_mat_q
-    use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.
-    Not recommended since this is both slower and uses more VRAM.
-#endif // GGML_USE_CUBLAS
-#endif
---verbose_promptprint prompt before generation
-    fprintf(stderr, "  --simple_io     use basic IO for better compatibility in subprocesses and limited consoles
---lora FNAME    apply LoRA adapter (implies --no_mmap)
---lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap)
---lora_base FNAME     optional model to use as a base for the layers modified by the LoRA adapter
--m FNAME, --model FNAME
-    model path (default: %s)\n", params.model.c_str());
--md FNAME, --model_draft FNAME
-    draft model for speculative decoding (default: %s)\n", params.model.c_str());
--ld LOGDIR, --logdir LOGDIR
-    path under which to save YAML logs (no logging if unset)
\ No newline at end of file
+ -h, --help            show this help message and exit\n"
+ -i, --interactive     run in interactive mode\n"
+ --interactive_first   run in interactive mode and wait for input right away\n"
+ -ins, --instruct      run in instruction mode (use with Alpaca models)\n"
+ --multiline_input     allows you to write or paste multiple lines without ending each in '\\'\n"
+ -r PROMPT, --reverse_prompt PROMPT\n"
+                       halt generation at PROMPT, return control in interactive mode\n"
+                       (can be specified more than once for multiple prompts).\n"
+ --color               colorise output to distinguish prompt and user input from generations\n"
+ -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)\n"
+ -t N, --threads N     number of threads to use during generation (default: %d)\n", params.n_threads
+ -tb N, --threads_batch N\n"
+                       number of threads to use during batch and prompt processing (default: same as --threads)\n"
+ -p PROMPT, --prompt PROMPT\n"
+                       prompt to start generation with (default: empty)\n"
+ -e, --escape          process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"
+ --prompt_cache FNAME  file to cache prompt state for faster startup (default: none)\n"
+ --prompt_cache_all    if specified, saves user input and generations to cache as well.\n"
+                       not supported with --interactive or other interactive options\n"
+ --prompt_cache_ro     if specified, uses the prompt cache but does not update it.\n"
+ --random_prompt       start with a randomized prompt.\n"
+ --in_prefix_bos       prefix BOS to user inputs, preceding the `--in_prefix` string\n"
+ --in_prefix STRING    string to prefix user inputs with (default: empty)\n"
+ --in_suffix STRING    string to suffix after user inputs with (default: empty)\n"
+ -f FNAME, --file FNAME\n"
+                       prompt file to start generation.\n"
+ -n N, --n_predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict
+ -c N, --ctx_size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx
+ -b N, --batch_size N  batch size for prompt processing (default: %d)\n", params.n_batch
+ --top_k N             top_k sampling (default: %d, 0 = disabled)\n", params.top_k
+ --top_p N             top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p
+ --tfs N               tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z
+ --typical N           locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p
+ --repeat_last_n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n
+ --repeat_penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty
+ --presence_penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty
+ --frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty
+ --mirostat N          use Mirostat sampling.\n"
+                       Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"
+                       (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat
+ --mirostat_lr N       Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta
+ --mirostat_ent N      Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau
+ -l T, --logit_bias T  T = TOKEN_ID(plus/minus)BIAS\n"
+                       modifies the likelihood of token appearing in the completion,\n"
+                       i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello',\n"
+                       or `--logit_bias 15043_1` to decrease likelihood of token ' Hello'\n"
+ --grammar GRAMMAR     BNF_like grammar to constrain generations (see samples in grammars/ dir)\n"
+ --grammar_file FNAME  file to read grammar from\n"
+ --cfg_negative_prompt PROMPT\n"
+                       negative prompt to use for guidance. (default: empty)\n"
+ --cfg_negative_prompt_file FNAME\n"
+                       negative prompt file to use for guidance. (default: empty)\n"
+ --cfg_scale N         strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale
+ --rope_scale N        RoPE context linear scaling factor, inverse of --rope_freq_scale\n"
+ --rope_freq_base N    RoPE base frequency, used by NTK_aware scaling (default: loaded from model)\n"
+ --rope_freq_scale N   RoPE frequency linear scaling factor (default: loaded from model)\n"
+ --ignore_eos          ignore end of stream token and continue generating (implies --logit_bias 2_inf)\n"
+ --no_penalize_nl      do not penalize newline token (default is DO penalise nl token)\n"
+ --memory_f32          use f32 instead of f16 for memory key+value (default: disabled)\n"
+                       not recommended: doubles context memory required and no measurable increase in quality\n"
+ --temp N              temperature (default: %.1f)\n", (double)params.temp
+ --logits_all          return logits for all tokens in the batch (default: disabled)\n"
+ --hellaswag           compute HellaSwag score over random tasks from datafile supplied with -f\n"
+ --hellaswag_tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks
+ --keep N              number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep
+ --draft N             number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft
+ --chunks N            max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks
+ -np N, --parallel N   number of parallel sequences to decode (default: %d)\n", params.n_parallel
+ -ns N, --sequences N  number of sequences to decode (default: %d)\n", params.n_sequences
+ -cb, --cont_batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)\n"
+ --mlock               force system to keep model in RAM rather than swapping or compressing\n"
+ --no_mmap             do not memory_map model (slower load but may reduce pageouts if not using mlock)\n"
+ --numa                attempt optimizations that help on some NUMA systems\n"
+                       if run without this previously, it is recommended to drop the system page cache before using this\n"
+                       see https://github.com/ggerganov/llama.cpp/issues/1437\n"
+ -ngl N, --n_gpu_layers N\n"
+                       number of layers to store in VRAM\n"
+ -ngld N, --n_gpu_layers_draft N\n"
+                       number of layers to store in VRAM for the draft model\n"
+ -ts SPLIT --tensor_split SPLIT\n"
+                       how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1\n"
+ -mg i, --main_gpu i   the GPU to use for scratch and small tensors\n"
+ -nommq, --no_mul_mat_q\n"
+                       use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.\n"
+                       Not recommended since this is both slower and uses more VRAM.\n"
+ --verbose_prompt      print prompt before generation\n"
+ --lora FNAME          apply LoRA adapter (implies --no_mmap)\n"
+ --lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap)\n"
+ --lora_base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n"
+ -m FNAME, --model FNAME\n"
+                       model path (default: %s)\n", params.model.c_str()
+ -md FNAME, --model_draft FNAME\n"
+                       draft model for speculative decoding (default: %s)\n", params.model.c_str()
+ -ld LOGDIR, --logdir LOGDIR\n"
+                       path under which to save YAML logs (no logging if unset)\n"
+ --ppl_stride          stride for ppl calcs. 0 (default): the pre_existing approach will be used.\n"
+ --ppl_output_type     0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n"
+ --embedding           0 (default): get only sentence embedding\n"
+ --beams N             0 (default): if non_zero use beam search of given width N.\n"
+ --memory_f32          0 (default): if true (= 1) disable f16 memory.\n"
+ --no_mmap             0 (default): if true use mmap for faster loads.\n"
+ --mlock               0 (default): if true keep model in memory.\n"
+ --use_color           0 (default): use color to distinguish generations from inputs\n"
+ --nprobs N            if > 0 output the probabilities of the top N tokens\n"

From 32bdf0ee4b5f602dc940136ebbc44d0d0cd8b786 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 09:10:07 +0100
Subject: [PATCH 18/34] Final reconciliation

---
 common/common.cpp | 10 ++++++----
 help_list.txt     |  2 ++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 4d4ecf03df092..0b295f41d20b5 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -622,7 +622,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
 }
 
 // There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too):
-// --embedding, --beams, --ppl-stride, --ppl-output-type, memory-f32, no-mmap, mlock, use-color, nprobs
+// --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill
 void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("usage: %s [options]\n", argv[0]);
     printf("\n");
@@ -735,11 +735,13 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("  --ppl-output-type     0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n");
     printf("  --embedding           0 (default): get only sentence embedding\n");
     printf("  --beams N             0 (default): if non-zero use beam search of given width N.\n");
-    printf("  --memory-f32          0 (default): if true (= 1) disable f16 memory.\n");   
+    printf("  --memory-f32          0 (default): if true (= 1) disable f16 memory.\n");
     printf("  --no-mmap             0 (default): if true use mmap for faster loads.\n");
-    printf("  --mlock               0 (default): if true keep model in memory.\n");  
+    printf("  --mlock               0 (default): if true keep model in memory.\n");
     printf("  --use-color           0 (default): use color to distinguish generations from inputs\n");
-    printf("  --nprobs N            if > 0 output the probabilities of the top N tokens\n"); 
+    printf("  --nprobs N            if > 0 output the probabilities of the top N tokens\n");
+    printf("  --alias               model alias (default: 'unknown')\n");
+    printf("  --infill              0 (defaut) use infill mode\n");
     printf("\n");
 }
 
diff --git a/help_list.txt b/help_list.txt
index 97b91a98259d5..38e0f6f1d8591 100644
--- a/help_list.txt
+++ b/help_list.txt
@@ -102,3 +102,5 @@
  --mlock               0 (default): if true keep model in memory.\n"
  --use_color           0 (default): use color to distinguish generations from inputs\n"
  --nprobs N            if > 0 output the probabilities of the top N tokens\n"
+ --alias               model alias (default: 'unknown')\n"
+ --infill              0 (defaut) use infill mode\n"

From 2e17fcfdbaa99e61171b5fe1c342e61d69f99d87 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 09:46:26 +0100
Subject: [PATCH 19/34] Comment in common.cpp

---
 common/common.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/common/common.cpp b/common/common.cpp
index 0b295f41d20b5..927a4d9b4fda8 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -623,6 +623,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
 
 // There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too):
 // --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill
+// some corresponding changes to the sequence of fprintf() code may be needed
 void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("usage: %s [options]\n", argv[0]);
     printf("\n");

From 3e4de67fdd3d1f0ab89915d4593fc648188e9266 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 10:01:21 +0100
Subject: [PATCH 20/34] Update find_implemented_args.py

---
 examples/cmap-example/find_implemented_args.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
index ac6e6cfdc8695..219a05fc6d194 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@@ -1,4 +1,7 @@
-# search the specified directory for files that include argv[i] == '-f' or '--file' arguments
+# search the specified directory for files that include command-line arguments
+# these are almost always in the form params.argument; "logit_bias" is one exception
+# have yet to investigate fully what "lora_adapter" in server.cpp does since it is not apparently
+# accessible from the command-line arg/parameter sequence.
 
 import os
 import re
@@ -86,6 +89,8 @@ def title_print(filename):
     print(f"Filename: \033[32m{title}\033[0m")
     print("#"*(10+len(title))) 
 
+# list all the equivalences between declarations in common.h and common.cpp that defines the help
+# these are used to substitute the searched params.attributes (keys) with help attributes (values)
 def substitution_list(parameters):
     # store untrapped parameters as identicals in case we need to change them later
     sub_dict = {"n_threads": "threads",

From 990e8cb329cacf2137cdf4e9b4815eaeeb93c054 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 10:36:11 +0100
Subject: [PATCH 21/34] New comment

---
 examples/cmap-example/find_implemented_args.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
index 219a05fc6d194..2d86e8bc95ccb 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@@ -2,6 +2,7 @@
 # these are almost always in the form params.argument; "logit_bias" is one exception
 # have yet to investigate fully what "lora_adapter" in server.cpp does since it is not apparently
 # accessible from the command-line arg/parameter sequence.
+# there is also an issue with -ngl which does not appear in some help menus even when apparently implemented, e.g. in parallel.cpp
 
 import os
 import re

From 51446bf9219fe5e8d12f5ca9e1342677cc40f527 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 13:35:52 +0100
Subject: [PATCH 22/34] Naming convention

---
 ..._implemented_args.py => find-implemented-args.py} | 12 ++++++------
 .../{read_common_h.py => readcommonh.py}             |  0
 2 files changed, 6 insertions(+), 6 deletions(-)
 rename examples/cmap-example/{find_implemented_args.py => find-implemented-args.py} (96%)
 rename examples/cmap-example/{read_common_h.py => readcommonh.py} (100%)

diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find-implemented-args.py
similarity index 96%
rename from examples/cmap-example/find_implemented_args.py
rename to examples/cmap-example/find-implemented-args.py
index 2d86e8bc95ccb..38a83b9418127 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find-implemented-args.py
@@ -8,7 +8,7 @@
 import re
 import collections
 import re
-import read_common_h
+import readcommonh
 
 # update the source file - usually 'help_list.txt', so the default - in case the source file has been changed
 def update_file(file_from, file_to = "help_list.txt"):
@@ -177,8 +177,8 @@ def find_parameters(file, sorted_result):
                 # now do it the new way
                 print("\nNow we extract the original gpt_params definition from common.h with the defaults for implemented arguments:\n")
                 gpt_count = 0
-                for k,v in read_common_h.parameters.items():
-                    if not read_common_h.parameters.items():
+                for k,v in readcommonh.parameters.items():
+                    if not readcommonh.parameters.items():
                         print(f"    \033[032mNone\033[0m\n")
                     elif k in arguments:
                         # print(f"gpt_params: \033[33m{k:>20}\033[0m values: {v}")
@@ -190,9 +190,9 @@ def find_parameters(file, sorted_result):
                 print("\nSearching the other way round is more efficient:\n")
                 key_count = 0
                 for argument in set(arguments):
-                    if argument in read_common_h.parameters:
+                    if argument in readcommonh.parameters:
                         key_count += 1
-                        print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(read_common_h.parameters[argument]):<60}; default: {read_common_h.parameters[argument][1]:<10}")
+                        print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(readcommonh.parameters[argument]):<60}; default: {readcommonh.parameters[argument][1]:<10}")
                 if help_count == gpt_count and gpt_count == key_count:
                     print(f"\n\033[032mNo unresolved help-list incompatibilities with \033[33m{filename.split('/')[-1]}\033[0m")
                 else:
@@ -207,7 +207,7 @@ def find_parameters(file, sorted_result):
     update_file("common/common.cpp", "help_list.txt")
 
     # get the parameters from the common.h file utiity we import
-    print(read_common_h.parameters)
+    print(readcommonh.parameters)
     # So now we've got the gpt_parameters in this parameters dict
 
     # First we alter all the hyphenated help words in help-file.txt to underscores
diff --git a/examples/cmap-example/read_common_h.py b/examples/cmap-example/readcommonh.py
similarity index 100%
rename from examples/cmap-example/read_common_h.py
rename to examples/cmap-example/readcommonh.py

From 3f07ed90a449512e9e62c7d3a1f11bcef33323ab Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 13:45:12 +0100
Subject: [PATCH 23/34] Added prompt-file to hep

---
 common/common.cpp                              | 1 +
 examples/cmap-example/find-implemented-args.py | 1 +
 help_list.txt                                  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/common/common.cpp b/common/common.cpp
index 9b29d7532a156..f0147aa76d38c 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -748,6 +748,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("  --nprobs N            if > 0 output the probabilities of the top N tokens\n");
     printf("  --alias               model alias (default: 'unknown')\n");
     printf("  --infill              0 (defaut) use infill mode\n");
+    printf("  --prompt-file         name of external prompt file\n");
     printf("\n");
 }
 
diff --git a/examples/cmap-example/find-implemented-args.py b/examples/cmap-example/find-implemented-args.py
index 38a83b9418127..a0146b7775e20 100644
--- a/examples/cmap-example/find-implemented-args.py
+++ b/examples/cmap-example/find-implemented-args.py
@@ -106,6 +106,7 @@ def substitution_list(parameters):
                 "n_keep" : "keep",
                 "n_probs" : "nprobs",
                 "path_prompt_cache" : "prompt_cache",
+                "prompt_file" : "prompt_file",
                 "input_prefix" : "in_prefix",
                 "input_suffix" : "in_suffix",
                 "input_prefix_bos" : "in_prefix_bos",
diff --git a/help_list.txt b/help_list.txt
index 38e0f6f1d8591..c74199e0e4264 100644
--- a/help_list.txt
+++ b/help_list.txt
@@ -104,3 +104,4 @@
  --nprobs N            if > 0 output the probabilities of the top N tokens\n"
  --alias               model alias (default: 'unknown')\n"
  --infill              0 (defaut) use infill mode\n"
+ --prompt_file         name of external prompt file\n"

From 094d6d6e09f52913a7e8ee088c356ae5439af393 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 16:10:10 +0100
Subject: [PATCH 24/34] Add help list

---
 common/common.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/common.cpp b/common/common.cpp
index f0147aa76d38c..1a6156473ab93 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -627,7 +627,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
 }
 
 // There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too):
-// --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill
+// --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill, --prompt-file
 // some corresponding changes to the sequence of fprintf() code may be needed
 void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("usage: %s [options]\n", argv[0]);

From 9abc92545cf79faa2b4274c5ee32bc1fbc6491d1 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 16:53:27 +0100
Subject: [PATCH 25/34] Remove trailing ws

---
 examples/cmap-example/cmap-example.cpp        |  4 ++--
 .../cmap-example/find-implemented-args.py     | 20 +++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index ece30702aa098..d4214b3bbd0d1 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -24,7 +24,7 @@ int main() {
     }
 
     // Now try the helpdict idea
-    
+
     printf("Second kind of dictionary\n");
 
     // Create a list of strings
@@ -37,7 +37,7 @@ int main() {
     std::list<std::string>& fruitsList = helpdict["fruits"];
     fruitsList.push_back("grape");
     fruitsList.push_back("pineapple");
-   
+
     for (const auto& pair : helpdict) {
         printf("helpdict contains a list of %s\n", pair.first.c_str());
         for (const auto& element : pair.second) {
diff --git a/examples/cmap-example/find-implemented-args.py b/examples/cmap-example/find-implemented-args.py
index a0146b7775e20..6de919097e9f5 100644
--- a/examples/cmap-example/find-implemented-args.py
+++ b/examples/cmap-example/find-implemented-args.py
@@ -29,10 +29,10 @@ def update_file(file_from, file_to = "help_list.txt"):
 def replace_dashes_with_underscores(filename):
     with open(filename, 'r') as file:
         content = file.read()
-        
+
     # Match '-' surrounded by word characters on both sides and replace with '_'
     replaced_content = re.sub(r'(\w)-(\w)', r'\1_\2', content)
-    
+
     with open(filename, 'w') as file:
         file.write(replaced_content)
 
@@ -86,9 +86,9 @@ def concatenate(v):
 
 def title_print(filename):
     title = filename.split('/')[-1]
-    print("\n\n"+"#"*(10+len(title)))         
+    print("\n\n"+"#"*(10+len(title)))
     print(f"Filename: \033[32m{title}\033[0m")
-    print("#"*(10+len(title))) 
+    print("#"*(10+len(title)))
 
 # list all the equivalences between declarations in common.h and common.cpp that defines the help
 # these are used to substitute the searched params.attributes (keys) with help attributes (values)
@@ -130,7 +130,7 @@ def substitution_list(parameters):
     new_parameters = []
     for parameter in parameters:
         if parameter in sub_dict:
-            # we need both for future reference 
+            # we need both for future reference
             new_parameters.append(parameter)
             new_parameters.append(sub_dict[parameter])
         else:
@@ -154,7 +154,7 @@ def find_parameters(file, sorted_result):
                     # bit of a hack to exclude --attributes at the end of help comment lines
                     if re.search(pattern, line[:50]):
                         parameters.append(line)
-    
+
             all_parameters = set(parameters)
 
             title_print(filename)
@@ -162,8 +162,8 @@ def find_parameters(file, sorted_result):
 
             if not all_parameters:
                 print(f"    \033[032mNone\033[0m\n")
-            
-            # first do it the original way 
+
+            # first do it the original way
             else:
                 help_count = 0
                 for parameter in all_parameters:
@@ -186,7 +186,7 @@ def find_parameters(file, sorted_result):
                         concatenated_element = concatenate(v)
                         gpt_count += 1
                         print(f"{gpt_count:>2} gpt_param: \033[32m{k:>19}; \033[34mrole: \033[33m{concatenated_element:<60}\033[0m;  \033[34mdefault: \033[30m{v[1]:<10}\033[0m ")
-                
+
                 # searching the other way round is quicker:
                 print("\nSearching the other way round is more efficient:\n")
                 key_count = 0
@@ -203,7 +203,7 @@ def find_parameters(file, sorted_result):
 directory = '/Users/edsilm2/llama.cpp/examples'
 
 if __name__ == '__main__':
-   
+
    # update the source help file from C++ source (this works exactly as required)
     update_file("common/common.cpp", "help_list.txt")
 

From 6189a9ef3ab582a6a96f7ed9b84ca89364c98f9b Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 16:55:31 +0100
Subject: [PATCH 26/34] One more trailing ws

---
 examples/cmap-example/readcommonh.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cmap-example/readcommonh.py b/examples/cmap-example/readcommonh.py
index 1c18d496002aa..422edea8fd252 100644
--- a/examples/cmap-example/readcommonh.py
+++ b/examples/cmap-example/readcommonh.py
@@ -25,7 +25,7 @@
         if "float>" in parameters and parameters["float>"][1] == 'logit_bias':
             del parameters["float>"]
 
-    # this is a bit of a hack to terminate the harvest 
+    # this is a bit of a hack to terminate the harvest
     if len(non_whitespace_elements) > 2 and non_whitespace_elements[1] == "infill":
         inside = False
         break

From 53dbefa2f8583fbece87d7a6e3d3ca3346b1a3f2 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 17:05:24 +0100
Subject: [PATCH 27/34] Added final newline (2)

---
 examples/cmap-example/cmap-example.cpp         | 2 +-
 examples/cmap-example/find-implemented-args.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index d4214b3bbd0d1..0733952074247 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -141,4 +141,4 @@ int main() {
         }
     }
     return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/cmap-example/find-implemented-args.py b/examples/cmap-example/find-implemented-args.py
index 6de919097e9f5..c48c8c5a3d82c 100644
--- a/examples/cmap-example/find-implemented-args.py
+++ b/examples/cmap-example/find-implemented-args.py
@@ -225,4 +225,4 @@ def find_parameters(file, sorted_result):
 
     print("\n######################## find help context parameters #################################")
     # analyse the files and what they contain
-    find_parameters("help_list.txt", sorted)
\ No newline at end of file
+    find_parameters("help_list.txt", sorted)

From ba324022917f39bd849dbce73da9728957e82090 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 20:38:00 +0100
Subject: [PATCH 28/34] creadcommonh

---
 examples/cmap-example/cmap-example.cpp | 90 ++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index 0733952074247..3fa90a726a326 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -7,6 +7,12 @@
 #include <bitset>
 #include <vector>
 
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <regex>
+#include <unordered_map>
+
 int main() {
     std::map<std::string, int> dict;
     std::map<std::string, std::list<std::string>> helpdict;
@@ -142,3 +148,87 @@ int main() {
     }
     return 0;
 }
+
+std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
+    std::vector<std::string> tokens;
+    std::size_t start = 0, end = 0;
+    while ((end = str.find(delimiter, start)) != std::string::npos) {
+        tokens.push_back(str.substr(start, end - start));
+        start = end + delimiter.length();
+    }
+    tokens.push_back(str.substr(start));
+    return tokens;
+}
+
+std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
+    std::ifstream file("common/common.h");
+    std::string line;
+    std::vector<std::string> lines;
+    while (std::getline(file, line)) {
+        lines.push_back(line);
+    }
+
+    std::unordered_map<std::string, std::vector<std::string>> parameters;
+    parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "probability", "of", "particular", "words");
+
+    bool inside = false;
+    for (const std::string& line : lines) {
+        std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
+        std::cout << "nwe = \033[33m";
+        for (const std::string& element : nonWhitespaceElements) {
+            std::cout << element << " ";
+        }
+        std::cout << "\033[0m" << std::endl;
+
+        if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
+            inside = true;
+        }
+
+        if (nonWhitespaceElements.size() > 2 && inside) {
+            // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
+            // Here we deliberately add back the key so we can manually change it when it is different
+            parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
+
+            // Remove spurious entry caused by eccentric status of logit_bias
+            if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
+                parameters.erase("float>");
+            }
+        }
+
+        // Terminate the harvest
+        if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
+            inside = false;
+            break;
+        }
+    }
+
+    for (const auto& pair : parameters) {
+        const std::string& key = pair.first;
+        const std::vector<std::string>& value = pair.second;
+        std::cout << "key: " << std::left << std::setw(20) << key << "; values: ";
+        for (const std::string& element : value) {
+            std::cout << element << " ";
+        }
+        std::cout << std::endl;
+
+        std::string concatenatedElement = "";
+        for (std::size_t i = 0; i < value.size(); i++) {
+            if (value[i] == "//") {
+                concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
+                // break;
+            }
+        }
+
+        std::cout << std::string(10, ' ');
+        std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
+                  << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
+                  << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
+    }
+
+    return parameters;
+}
+
+int main() {
+    std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
+    return 0;
+}

From 47675b920037b41fd67ee7c62d309b26bad84aa8 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 21:00:49 +0100
Subject: [PATCH 29/34] experiment in C

---
 examples/cmap-example/cmap-example.cpp | 46 +++++++------
 examples/cmap-example/creadcommonh.cpp | 91 ++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 24 deletions(-)
 create mode 100644 examples/cmap-example/creadcommonh.cpp

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index 3fa90a726a326..0719fc95f4759 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -146,32 +146,31 @@ int main() {
         printf("\n");
         }
     }
-    return 0;
-}
 
-std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
-    std::vector<std::string> tokens;
-    std::size_t start = 0, end = 0;
-    while ((end = str.find(delimiter, start)) != std::string::npos) {
-        tokens.push_back(str.substr(start, end - start));
-        start = end + delimiter.length();
+    std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
+        std::vector<std::string> tokens;
+        std::size_t start = 0, end = 0;
+        while ((end = str.find(delimiter, start)) != std::string::npos) {
+            tokens.push_back(str.substr(start, end - start));
+            start = end + delimiter.length();
+        }
+        tokens.push_back(str.substr(start));
+        return tokens;
     }
-    tokens.push_back(str.substr(start));
-    return tokens;
-}
 
-std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
-    std::ifstream file("common/common.h");
-    std::string line;
-    std::vector<std::string> lines;
-    while (std::getline(file, line)) {
-        lines.push_back(line);
-    }
+    std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
+        std::ifstream file("common/common.h");
+        std::string line;
+        std::vector<std::string> lines;
+        while (std::getline(file, line)) {
+            lines.push_back(line);
+        }
 
     std::unordered_map<std::string, std::vector<std::string>> parameters;
     parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "probability", "of", "particular", "words");
 
     bool inside = false;
+
     for (const std::string& line : lines) {
         std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
         std::cout << "nwe = \033[33m";
@@ -221,14 +220,13 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
 
         std::cout << std::string(10, ' ');
         std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
-                  << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
-                  << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
+                << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
+                << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
+    }
+        return parameters;
     }
 
-    return parameters;
-}
-
-int main() {
     std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
+
     return 0;
 }
diff --git a/examples/cmap-example/creadcommonh.cpp b/examples/cmap-example/creadcommonh.cpp
new file mode 100644
index 0000000000000..7e8ac67f4d5e9
--- /dev/null
+++ b/examples/cmap-example/creadcommonh.cpp
@@ -0,0 +1,91 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <regex>
+#include <unordered_map>
+#include <vector>
+
+std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
+    std::vector<std::string> tokens;
+    std::size_t start = 0, end = 0;
+    while ((end = str.find(delimiter, start)) != std::string::npos) {
+        tokens.push_back(str.substr(start, end - start));
+        start = end + delimiter.length();
+    }
+    tokens.push_back(str.substr(start));
+    return tokens;
+}
+
+std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
+    std::ifstream file("common/common.h");
+    std::string line;
+    std::vector<std::string> lines;
+    while (std::getline(file, line)) {
+        lines.push_back(line);
+    }
+
+    std::unordered_map<std::string, std::vector<std::string>> parameters;
+    parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen");
+
+        bool inside = false;
+    for (const std::string& line : lines) {
+        std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
+        std::cout << "nwe = \033[33m";
+        for (const std::string& element : nonWhitespaceElements) {
+            std::cout << element << " ";
+        }
+        std::cout << "\033[0m" << std::endl;
+
+        if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
+            inside = true;
+        }
+
+        if (nonWhitespaceElements.size() > 2 && inside) {
+            // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
+            // Here we deliberately add back the key so we can manually change it when it is different
+            parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
+
+            // Remove spurious entry caused by eccentric status of logit_bias
+            if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
+                parameters.erase("float>");
+            }
+        }
+
+        // Terminate the harvest
+        if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
+            inside = false;
+            break;
+        }
+    }
+
+    for (const auto& pair : parameters) {
+        const std::string& key = pair.first;
+        const std::vector<std::string>& value = pair.second;
+        std::cout << "key: " << std::left << std::setw(20) << key << "; values: ";
+        for (const std::string& element : value) {
+            std::cout << element << " ";
+        }
+        std::cout << std::endl;
+
+        std::string concatenatedElement = "";
+        for (std::size_t i = 0; i < value.size(); i++) {
+            if (value[i] == "//") {
+                concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
+                // break;
+            }
+        }
+
+        std::cout << std::string(10, ' ');
+        std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
+                  << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
+                  << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
+    }
+
+    return parameters;
+}
+
+// everything above is called from here as 'extractParameters()'
+int main() {
+    std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
+    return 0;
+}

From bddd099edff632c4b80b94a3ff9a70bf9b498e70 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 22:17:17 +0100
Subject: [PATCH 30/34] experimental

---
 examples/cmap-example/cmap-example.cpp | 160 +++++++++++++------------
 examples/cmap-example/creadcommonh.cpp |   3 +-
 2 files changed, 83 insertions(+), 80 deletions(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index 0719fc95f4759..1487a61abe808 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -12,6 +12,86 @@
 #include <sstream>
 #include <regex>
 #include <unordered_map>
+#include <numeric>
+
+std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
+    std::vector<std::string> tokens;
+    std::size_t start = 0, end = 0;
+    while ((end = str.find(delimiter, start)) != std::string::npos) {
+        tokens.push_back(str.substr(start, end - start));
+        start = end + delimiter.length();
+    }
+    tokens.push_back(str.substr(start));
+    return tokens;
+}
+
+std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
+    std::ifstream file("common/common.h");
+    std::string line;
+    std::vector<std::string> lines;
+    while (std::getline(file, line)) {
+        lines.push_back(line);
+    }
+
+    std::unordered_map<std::string, std::vector<std::string>> parameters;
+    parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
+
+        bool inside = false;
+    for (const std::string& line : lines) {
+        std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
+        std::cout << "nwe = \033[33m";
+        for (const std::string& element : nonWhitespaceElements) {
+            std::cout << element << " ";
+        }
+        std::cout << "\033[0m" << std::endl;
+
+        if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
+            inside = true;
+        }
+
+        if (nonWhitespaceElements.size() > 2 && inside) {
+            // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
+            // Here we deliberately add back the key so we can manually change it when it is different
+            parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
+
+            // Remove spurious entry caused by eccentric status of logit_bias
+            if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
+                parameters.erase("float>");
+            }
+        }
+
+        // Terminate the harvest
+        if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
+            inside = false;
+            break;
+        }
+    }
+
+    for (const auto& pair : parameters) {
+        const std::string& key = pair.first;
+        const std::vector<std::string>& value = pair.second;
+        std::cout << "key: " << std::left << std::setw(20) << key << "; values: ";
+        for (const std::string& element : value) {
+            std::cout << element << " ";
+        }
+        std::cout << std::endl;
+
+        std::string concatenatedElement = "";
+        for (std::size_t i = 0; i < value.size(); i++) {
+            if (value[i] == "//") {
+                concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
+                // break;
+            }
+        }
+
+        std::cout << std::string(10, ' ');
+        std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
+                  << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
+                  << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
+    }
+
+    return parameters;
+}
 
 int main() {
     std::map<std::string, int> dict;
@@ -146,85 +226,7 @@ int main() {
         printf("\n");
         }
     }
-
-    std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
-        std::vector<std::string> tokens;
-        std::size_t start = 0, end = 0;
-        while ((end = str.find(delimiter, start)) != std::string::npos) {
-            tokens.push_back(str.substr(start, end - start));
-            start = end + delimiter.length();
-        }
-        tokens.push_back(str.substr(start));
-        return tokens;
-    }
-
-    std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
-        std::ifstream file("common/common.h");
-        std::string line;
-        std::vector<std::string> lines;
-        while (std::getline(file, line)) {
-            lines.push_back(line);
-        }
-
-    std::unordered_map<std::string, std::vector<std::string>> parameters;
-    parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "probability", "of", "particular", "words");
-
-    bool inside = false;
-
-    for (const std::string& line : lines) {
-        std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
-        std::cout << "nwe = \033[33m";
-        for (const std::string& element : nonWhitespaceElements) {
-            std::cout << element << " ";
-        }
-        std::cout << "\033[0m" << std::endl;
-
-        if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
-            inside = true;
-        }
-
-        if (nonWhitespaceElements.size() > 2 && inside) {
-            // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
-            // Here we deliberately add back the key so we can manually change it when it is different
-            parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
-
-            // Remove spurious entry caused by eccentric status of logit_bias
-            if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
-                parameters.erase("float>");
-            }
-        }
-
-        // Terminate the harvest
-        if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
-            inside = false;
-            break;
-        }
-    }
-
-    for (const auto& pair : parameters) {
-        const std::string& key = pair.first;
-        const std::vector<std::string>& value = pair.second;
-        std::cout << "key: " << std::left << std::setw(20) << key << "; values: ";
-        for (const std::string& element : value) {
-            std::cout << element << " ";
-        }
-        std::cout << std::endl;
-
-        std::string concatenatedElement = "";
-        for (std::size_t i = 0; i < value.size(); i++) {
-            if (value[i] == "//") {
-                concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
-                // break;
-            }
-        }
-
-        std::cout << std::string(10, ' ');
-        std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
-                << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
-                << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
-    }
-        return parameters;
-    }
+    // now process the extra bit inserted to replicate readcommonh.py
 
     std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
 
diff --git a/examples/cmap-example/creadcommonh.cpp b/examples/cmap-example/creadcommonh.cpp
index 7e8ac67f4d5e9..59e1e94ab6600 100644
--- a/examples/cmap-example/creadcommonh.cpp
+++ b/examples/cmap-example/creadcommonh.cpp
@@ -4,6 +4,7 @@
 #include <regex>
 #include <unordered_map>
 #include <vector>
+#include <numeric>
 
 std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
     std::vector<std::string> tokens;
@@ -25,7 +26,7 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
     }
 
     std::unordered_map<std::string, std::vector<std::string>> parameters;
-    parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen");
+    parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
 
         bool inside = false;
     for (const std::string& line : lines) {

From 301f13cf9b37a12f8b9f7c810733f9ad652c77b4 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 22:49:57 +0100
Subject: [PATCH 31/34] Compile new cmap-example

---
 examples/CMakeLists.txt                | 1 +
 examples/cmap-example/cmap-example.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index de4cf7a691768..b32706b0337e0 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -30,6 +30,7 @@ else()
     add_subdirectory(embd-input)
     add_subdirectory(llama-bench)
     add_subdirectory(beam-search)
+    add_subdirectory(cmap-example)
     if (LLAMA_METAL)
         add_subdirectory(metal)
     endif()
diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index 1487a61abe808..5d2f39762b299 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -36,7 +36,7 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
     std::unordered_map<std::string, std::vector<std::string>> parameters;
     parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
 
-        bool inside = false;
+    bool inside = false;
     for (const std::string& line : lines) {
         std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
         std::cout << "nwe = \033[33m";

From 759973be79ee1777314392ba5a9f33696edf842c Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Mon, 9 Oct 2023 23:11:43 +0100
Subject: [PATCH 32/34] printf format

---
 examples/cmap-example/cmap-example.cpp | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index 5d2f39762b299..cfac88860093d 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -39,11 +39,11 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
     bool inside = false;
     for (const std::string& line : lines) {
         std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
-        std::cout << "nwe = \033[33m";
+        printf("nwe = \033[33m");
         for (const std::string& element : nonWhitespaceElements) {
-            std::cout << element << " ";
+            printf("%s ", element);
         }
-        std::cout << "\033[0m" << std::endl;
+        printf("\033[0m\n");
 
         if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
             inside = true;
@@ -70,11 +70,11 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
     for (const auto& pair : parameters) {
         const std::string& key = pair.first;
         const std::vector<std::string>& value = pair.second;
-        std::cout << "key: " << std::left << std::setw(20) << key << "; values: ";
+        printf("key: %s; values: ", key);
         for (const std::string& element : value) {
-            std::cout << element << " ";
+            printf("%s ", element);
         }
-        std::cout << std::endl;
+        printf("\n");
 
         std::string concatenatedElement = "";
         for (std::size_t i = 0; i < value.size(); i++) {
@@ -84,10 +84,7 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
             }
         }
 
-        std::cout << std::string(10, ' ');
-        std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
-                  << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
-                  << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
+        printf("parameter: \033[32m key: \033[34m%s default: \033[30m%s \033[34mcomment: \033[33m%s\033[0m\n", key, value[1], concatenatedElement);
     }
 
     return parameters;

From 37050ba90a9fe47bbfa593278eeba3fa0303cc90 Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Tue, 10 Oct 2023 13:33:12 +0100
Subject: [PATCH 33/34] Updated cmap-example

---
 examples/cmap-example/cmap-example.cpp | 245 +++++++------------------
 examples/cmap-example/creadcommonh.cpp |  92 ----------
 2 files changed, 69 insertions(+), 268 deletions(-)
 delete mode 100644 examples/cmap-example/creadcommonh.cpp

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index cfac88860093d..1c39bd2bdbd52 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -1,31 +1,61 @@
 // example of a C/C++ equivalent data structure to the python dict
-// there are two: std::map automatically sorts on key; std::unordered_map does not
 
 #include <map>
 #include <list>
 #include <string>
 #include <bitset>
 #include <vector>
-
-#include <iostream>
+#include <cstdio>
+#include <cmath>
 #include <fstream>
 #include <sstream>
 #include <regex>
-#include <unordered_map>
+// there may be good reasons not to sort the parameters, but here we use map
+#include <map>
 #include <numeric>
 
-std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
+std::vector<std::string> split_string(const std::string& str, const std::string& delimiter) {
     std::vector<std::string> tokens;
     std::size_t start = 0, end = 0;
+    bool inside_tags = false;  // flag to track if we are inside "<>"
+
     while ((end = str.find(delimiter, start)) != std::string::npos) {
-        tokens.push_back(str.substr(start, end - start));
+        std::string token = str.substr(start, end - start);
+
+        // if (!token.empty()) { // Add condition to exclude empty substrings
+        //    tokens.push_back(token);
+
+        if (!inside_tags && !token.empty()) { // Add condition to exclude empty substrings and if not inside "<>"
+            tokens.push_back(token);
+        }
+        // deal with cases where the split character occurs inside <>
+        // Update inside_tags flag based on "<>"
+        size_t open_tag_pos = str.find("<", start);
+        size_t close_tag_pos = str.find(">", start);
+        if (open_tag_pos != std::string::npos && close_tag_pos != std::string::npos && open_tag_pos < end) {
+            inside_tags = true;
+        } else if (close_tag_pos != std::string::npos && close_tag_pos < end) {
+            inside_tags = false;
+        }
         start = end + delimiter.length();
     }
     tokens.push_back(str.substr(start));
     return tokens;
 }
 
-std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
+void print_parameters(const std::map<std::string, std::vector<std::string>>& parameters) {
+        for (const auto& pair : parameters) {
+            const std::string& key = pair.first;
+            const std::vector<std::string>& value = pair.second; // usually has multiple elements
+            printf("key: %25s: values: ", key.c_str());
+            for (const std::string& element : value) {
+                printf("%s ", element.c_str());
+            }
+            printf("\n");
+    }
+}
+
+std::map<std::string, std::vector<std::string>> extract_parameters() {
     std::ifstream file("common/common.h");
     std::string line;
     std::vector<std::string> lines;
@@ -33,199 +63,62 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
         lines.push_back(line);
     }
 
-    std::unordered_map<std::string, std::vector<std::string>> parameters;
-    parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
+    std::map<std::string, std::vector<std::string>> parameters;
+    // fix up failure to match logit_bias; may also need to add lora_adapter; now dealt with and ready for deletion
+    // parameters["logit_bias"] = {"std::unordered_map<llama_token, float>" "logit_bias", "=", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
+    // parameters["lora_adapter"] = {"std::vector<std::tuple<std::string, float>>", "lora_adapter", "=", "", "//", "lora", "adapter", "path", "with", "user-defined", "scale"};
 
+    // are we inside gpt_params?
+    // this for loop finds all the params inside struct gpt-params
     bool inside = false;
     for (const std::string& line : lines) {
-        std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
-        printf("nwe = \033[33m");
-        for (const std::string& element : nonWhitespaceElements) {
-            printf("%s ", element);
+        std::vector<std::string> nws_elements = split_string(line, " ");
+        printf("nwe = ");
+        for (const std::string& element : nws_elements) {
+            printf("%s ", element.c_str());
         }
-        printf("\033[0m\n");
+        printf("\n");
 
-        if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
+        if (!nws_elements.empty() && nws_elements[0] == "struct" && nws_elements[1] == "gpt_params") {
             inside = true;
         }
 
-        if (nonWhitespaceElements.size() > 2 && inside) {
-            // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
-            // Here we deliberately add back the key so we can manually change it when it is different
-            parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
+        if (nws_elements.size() > 2 && inside) {
+            // cannot use nwe[0] as key because types do not generate unique keys and so overwrite
+            // Here we deliberately add back the key so we can manually change it when it is different (remove eventually)
+            // parameters[nws_elements[1]] = nws_elements;
+            std::vector<std::string> copy = nws_elements; // Create a copy of nws_elements
+            parameters[nws_elements[1]] = copy; // Assign the copy to parameters
 
             // Remove spurious entry caused by eccentric status of logit_bias
-            if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
+            if (parameters.count("float>") && parameters["float>"][2] == "logit_bias;") {
                 parameters.erase("float>");
             }
+            // Remove spurious entry caused by eccentric status of lora_adapter
+            if (parameters.count("float>>") && parameters["float>>"][2] == "lora_adapter;") {
+                parameters.erase("float>>");
+            }
         }
 
-        // Terminate the harvest
-        if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
+        // Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now
+        if (nws_elements.size() > 2 && nws_elements[1] == "infill") {
             inside = false;
             break;
-        }
-    }
-
-    for (const auto& pair : parameters) {
-        const std::string& key = pair.first;
-        const std::vector<std::string>& value = pair.second;
-        printf("key: %s; values: ", key);
-        for (const std::string& element : value) {
-            printf("%s ", element);
-        }
-        printf("\n");
-
-        std::string concatenatedElement = "";
-        for (std::size_t i = 0; i < value.size(); i++) {
-            if (value[i] == "//") {
-                concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
-                // break;
             }
         }
+    // now display them (unnecessary operationally; here for development)
+    print_parameters(parameters);
 
-        printf("parameter: \033[32m key: \033[34m%s default: \033[30m%s \033[34mcomment: \033[33m%s\033[0m\n", key, value[1], concatenatedElement);
-    }
-
+    // return the results (will eventually become a void function)
     return parameters;
 }
 
 int main() {
-    std::map<std::string, int> dict;
-    std::map<std::string, std::list<std::string>> helpdict;
-
-    dict[std::string("apple")] = 5;
-    dict[std::string("banana")] = 2;
-    dict[std::string("orange")] = 7;
-
-
-    // Accessing elements in the map
-    printf("First kind of dictionary\n\nValue of apple: %d\n", dict[std::string("apple")]);
-
-    for (const auto& pair : dict) {
-    printf("Key: %10s, Value: %4d\n", pair.first.c_str(), pair.second);
-    }
-
-    // Now try the helpdict idea
-
-    printf("Second kind of dictionary\n");
-
-    // Create a list of strings
-    std::list<std::string> stringList = {"apple", "banana", "orange"};
-
-    // Add key-value pair to map
-    helpdict["fruits"] = stringList;
-
-    // Access and modify the list of strings
-    std::list<std::string>& fruitsList = helpdict["fruits"];
-    fruitsList.push_back("grape");
-    fruitsList.push_back("pineapple");
-
-    for (const auto& pair : helpdict) {
-        printf("helpdict contains a list of %s\n", pair.first.c_str());
-        for (const auto& element : pair.second) {
-            printf(" %s", element.c_str());
-        }
-        printf("\n");
-    }
-
-    // Create a binary key for each value consisting of a list of strings
-
-    std::map<std::string, std::list<std::string>> bitdict;
-
-    // Example binary key
-    int binaryKey1 = 0b0000001;
-    int binaryKey2 = 0b0000010;
-    int binaryKey3 = 0b0000100;
-    int binaryKey4 = 0b0001000;
-    int binaryKey5 = 0b0010000;
-
-    // Convert binary key to string
-    std::string keyString1 = std::bitset<8>(binaryKey1).to_string();
-    std::string keyString2 = std::bitset<8>(binaryKey2).to_string();
-    std::string keyString3 = std::bitset<8>(binaryKey3).to_string();
-    std::string keyString4 = std::bitset<8>(binaryKey4).to_string();
-    std::string keyString5 = std::bitset<8>(binaryKey5).to_string();
-
-    // Add key-value pair to map
-    bitdict[keyString1] = {"-h", "--help", "print this help list and exit"};
-    bitdict[keyString2] = {"-f", "FNAME", "--file", "FNAME", "read the prompts from an external text file"};
-    bitdict[keyString3] = {"-n", "N", "--n-predict", "N", "number of tokens to predict in generating a completion"};
-    bitdict[keyString4] = {"-t", "N", "--threads", "N", "number of threads to use"};
-    bitdict[keyString5] = {"-m", "MODELPATH", "--model", "MODELPATH", "path to llama model to use"};
-
-
-    for (const auto& pair : bitdict) {
-        printf("help dictionary contains a list of arguments specific to this app %s\n", pair.first.substr(pair.first.size() - 5).c_str());
-        for (const auto& element : pair.second) {
-            printf(" %5s", element.c_str());
-        }
-        printf("\n");
-    }
-
-    printf("\nThis is the complete help file in this mock-up illustrative example:\n\n");
-    for (const auto& pair: bitdict) {
-        printf("%s  ",pair.first.c_str());
-        for (const auto& element : pair.second) {
-            printf(" %5s", element.c_str());
-    }
-        printf("\n");
-    }
-
-    // Now we try to use the appcode to select from the help available
-    // app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101
-
-    int app1code = 0b0001011;
-    int app2code = 0b0010111;
-
-    printf("\nNow processing app with only -h, -t and -f implemented and appcode %3d\n", app1code);
-    if (app1code != 0) {
-        for (const auto& kvp : bitdict) {
-            if ((app1code & std::stoi(kvp.first)) != 0) {
-                printf("%s  ",kvp.first.c_str());
-                for (const auto& element : kvp.second) {
-                    printf(" %5s", element.c_str());
-                }
-                printf("\n");
-            }
-        }
-        printf("\n");
-    }
-
-    printf("\nNow processing app with only -h, -f, -m and -n implemented and appcode %3d\n", app2code);
-    if (app2code != 0) {
-        for (const auto& kvp : bitdict) {
-            if ((app2code & std::stoi(kvp.first)) != 0) {
-                printf("%s  ",kvp.first.c_str());
-                for (const auto& element : kvp.second) {
-                    printf(" %5s", element.c_str());
-                }
-                printf("\n");
-            }
-        }
-        printf("\n");
-    }
-
-    // This is more like the general way to do it
-    std::vector<int> appcodes = {2, 5, 11, 17, 23, 31};
-       for (size_t i = 0; i < appcodes.size(); ++i) {
-        int x = appcodes[i];
-        if (x != 0) {
-        for (const auto& kvp : bitdict) {
-            if ((x & std::stoi(kvp.first)) != 0) {
-                printf("appcode %3d %s  ", x, kvp.first.c_str());
-                for (const auto& element : kvp.second) {
-                    printf(" %5s", element.c_str());
-                }
-                printf("\n");
-            }
-        }
-        printf("\n");
-        }
-    }
-    // now process the extra bit inserted to replicate readcommonh.py
 
-    std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
+    // process the code inserted to replicate readcommonh.py
+    // this does not produce output but here is forced; it just collects the output into parameters and returns 0
+    std::map<std::string, std::vector<std::string>> parameters = extract_parameters();
+    print_parameters(parameters);
 
     return 0;
 }
diff --git a/examples/cmap-example/creadcommonh.cpp b/examples/cmap-example/creadcommonh.cpp
deleted file mode 100644
index 59e1e94ab6600..0000000000000
--- a/examples/cmap-example/creadcommonh.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <regex>
-#include <unordered_map>
-#include <vector>
-#include <numeric>
-
-std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
-    std::vector<std::string> tokens;
-    std::size_t start = 0, end = 0;
-    while ((end = str.find(delimiter, start)) != std::string::npos) {
-        tokens.push_back(str.substr(start, end - start));
-        start = end + delimiter.length();
-    }
-    tokens.push_back(str.substr(start));
-    return tokens;
-}
-
-std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
-    std::ifstream file("common/common.h");
-    std::string line;
-    std::vector<std::string> lines;
-    while (std::getline(file, line)) {
-        lines.push_back(line);
-    }
-
-    std::unordered_map<std::string, std::vector<std::string>> parameters;
-    parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
-
-        bool inside = false;
-    for (const std::string& line : lines) {
-        std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
-        std::cout << "nwe = \033[33m";
-        for (const std::string& element : nonWhitespaceElements) {
-            std::cout << element << " ";
-        }
-        std::cout << "\033[0m" << std::endl;
-
-        if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
-            inside = true;
-        }
-
-        if (nonWhitespaceElements.size() > 2 && inside) {
-            // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
-            // Here we deliberately add back the key so we can manually change it when it is different
-            parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
-
-            // Remove spurious entry caused by eccentric status of logit_bias
-            if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
-                parameters.erase("float>");
-            }
-        }
-
-        // Terminate the harvest
-        if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
-            inside = false;
-            break;
-        }
-    }
-
-    for (const auto& pair : parameters) {
-        const std::string& key = pair.first;
-        const std::vector<std::string>& value = pair.second;
-        std::cout << "key: " << std::left << std::setw(20) << key << "; values: ";
-        for (const std::string& element : value) {
-            std::cout << element << " ";
-        }
-        std::cout << std::endl;
-
-        std::string concatenatedElement = "";
-        for (std::size_t i = 0; i < value.size(); i++) {
-            if (value[i] == "//") {
-                concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
-                // break;
-            }
-        }
-
-        std::cout << std::string(10, ' ');
-        std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
-                  << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
-                  << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
-    }
-
-    return parameters;
-}
-
-// everything above is called from here as 'extractParameters()'
-int main() {
-    std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
-    return 0;
-}

From 37ae96c712954b92420da6ebbbadc54761e3f42f Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Tue, 10 Oct 2023 13:50:28 +0100
Subject: [PATCH 34/34] Correct bug

---
 examples/cmap-example/cmap-example.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index 1c39bd2bdbd52..d06699c864b94 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -1,4 +1,4 @@
-// example of a C/C++ equivalent data structure to the python dict
+// example of a C/C++ equivalent data structure to the python dict in readcommonh.py
 
 #include <map>
 #include <list>
@@ -101,7 +101,7 @@ std::map<std::string, std::vector<std::string>> extract_parameters() {
         }
 
         // Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now
-        if (nws_elements.size() > 2 && nws_elements[1] == "infill") {
+        if (nws_elements.size() > 2 && nws_elements[2] == "infill") {
             inside = false;
             break;
             }