From 5a5a71d7bdf70ac0d72711bcb6e1ae30d3ac5c7e Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Wed, 4 Oct 2023 17:31:15 +0100 Subject: [PATCH 01/34] Starting gpt4all --- examples/gpt4all.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gpt4all.sh b/examples/gpt4all.sh index 5fd739e55c554..92f1e372b244a 100755 --- a/examples/gpt4all.sh +++ b/examples/gpt4all.sh @@ -7,7 +7,7 @@ cd `dirname $0` cd .. -./main --color --instruct --threads 4 \ +./build/bin/main --color --instruct --threads 4 \ --model ./models/gpt4all-7B/gpt4all-lora-quantized.bin \ --file ./prompts/alpaca.txt \ --batch_size 8 --ctx_size 2048 -n -1 \ From 1bb192fc2747f8693a0301cca5c36a09165ac1f9 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 11:45:20 +0100 Subject: [PATCH 02/34] Add cmap_example.cpp --- scripts/cmap_example.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 scripts/cmap_example.cpp diff --git a/scripts/cmap_example.cpp b/scripts/cmap_example.cpp new file mode 100644 index 0000000000000..78b6330256f6f --- /dev/null +++ b/scripts/cmap_example.cpp @@ -0,0 +1,23 @@ +// example of a C/C++ equivalent data structure to the python dict +// there are two: std::map automatically sorts on key; std::unordered_map doesn't + +#include +#include + +int main() { + std::map dict; + + dict["apple"] = 5; + dict["banana"] = 2; + dict["orange"] = 7; + + + // Accessing elements in the map + std::cout << "Value of apple: " << dict["apple"] << std::endl; + + for (const auto& pair : dict) { + std::cout << "Key: " << pair.first << ", Value: " << pair.second << std::endl; + } + + return 0; +} \ No newline at end of file From aea055db42eadc44459188e8929adae43b38d9b6 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 11:52:44 +0100 Subject: [PATCH 03/34] Update cmap_example --- examples/cmap_example/CMakeLists.txt | 5 +++++ {scripts => examples/cmap_example}/cmap_example.cpp | 0 2 files changed, 5 insertions(+) create mode 100644 examples/cmap_example/CMakeLists.txt rename {scripts => examples/cmap_example}/cmap_example.cpp (100%) diff --git a/examples/cmap_example/CMakeLists.txt b/examples/cmap_example/CMakeLists.txt new file mode 100644 index 0000000000000..48acc6f74034d --- /dev/null +++ b/examples/cmap_example/CMakeLists.txt @@ -0,0 +1,5 @@ +set(TARGET cmap_example) +add_executable(${TARGET} cmap_example.cpp) +install(TARGETS ${TARGET} RUNTIME) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/scripts/cmap_example.cpp b/examples/cmap_example/cmap_example.cpp similarity index 100% rename from scripts/cmap_example.cpp rename to examples/cmap_example/cmap_example.cpp From eb939e0638ed30cda3c93dd1dbd4937ca56c8b40 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 12:04:03 +0100 Subject: [PATCH 04/34] Update cmap-example --- examples/{cmap_example => cmap-example}/CMakeLists.txt | 4 ++-- .../cmap_example.cpp => cmap-example/cmap-example.cpp} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename examples/{cmap_example => cmap-example}/CMakeLists.txt (71%) rename examples/{cmap_example/cmap_example.cpp => cmap-example/cmap-example.cpp} (100%) diff --git a/examples/cmap_example/CMakeLists.txt b/examples/cmap-example/CMakeLists.txt similarity index 71% rename from examples/cmap_example/CMakeLists.txt rename to examples/cmap-example/CMakeLists.txt index 48acc6f74034d..c5820f7b1bfcd 100644 --- a/examples/cmap_example/CMakeLists.txt +++ b/examples/cmap-example/CMakeLists.txt @@ -1,5 +1,5 @@ -set(TARGET cmap_example) -add_executable(${TARGET} cmap_example.cpp) +set(TARGET cmap-example) +add_executable(${TARGET} cmap-example.cpp) install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/cmap_example/cmap_example.cpp b/examples/cmap-example/cmap-example.cpp similarity index 100% rename from examples/cmap_example/cmap_example.cpp rename to examples/cmap-example/cmap-example.cpp From e3c8f7bd167e51a346909d6324dd6b8a6796496f Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 12:13:30 +0100 Subject: [PATCH 05/34] Update cmap-example --- examples/CMakeLists.txt | 1 + examples/cmap-example/cmap-example.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index de4cf7a691768..9b39b3fa9aa13 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -30,6 +30,7 @@ else() add_subdirectory(embd-input) add_subdirectory(llama-bench) add_subdirectory(beam-search) + add_subdirectory(cmap-examples) if (LLAMA_METAL) add_subdirectory(metal) endif() diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index 78b6330256f6f..e3af5073fd754 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -1,6 +1,8 @@ // example of a C/C++ equivalent data structure to the python dict // there are two: std::map automatically sorts on key; std::unordered_map doesn't +#include "llama.h" + #include #include From 73fa2deee10e9c80b4598683fd5abe54cbc6d880 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 12:18:21 +0100 Subject: [PATCH 06/34] Update cmap-example --- examples/cmap-example/cmap-example.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index e3af5073fd754..c6db1129b86d9 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -1,9 +1,6 @@ // example of a C/C++ equivalent data structure to the python dict -// there are two: std::map automatically sorts on key; std::unordered_map doesn't +// there are two: std::map automatically sorts on key; std::unordered_map does not -#include "llama.h" - -#include #include int main() { @@ -15,10 +12,10 @@ int main() { // Accessing elements in the map - std::cout << "Value of apple: " << dict["apple"] << std::endl; + printf("Value of apple: %d\n", dict["apple"]); for (const auto& pair : dict) { - std::cout << "Key: " << pair.first << ", Value: " << pair.second << std::endl; + printf("Key: %s, Value: $s\n", pair.first, pair.second); } return 0; From 9dad8b81e1219da954a97e5ddfd62127ffbb2e66 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 12:23:49 +0100 Subject: [PATCH 07/34] Update cmap-example --- Makefile | 3 +++ examples/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 91198c555f5d0..8d37947a54f8a 100644 --- a/Makefile +++ b/Makefile @@ -564,6 +564,9 @@ perplexity: examples/perplexity/perplexity.cpp build-info.h ggml. embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) +cmap-example: examples/cmap-example/cmap-example.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 9b39b3fa9aa13..b32706b0337e0 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -30,7 +30,7 @@ else() add_subdirectory(embd-input) add_subdirectory(llama-bench) add_subdirectory(beam-search) - add_subdirectory(cmap-examples) + add_subdirectory(cmap-example) if (LLAMA_METAL) add_subdirectory(metal) endif() From 7804fe0d687c42d2505d741f70ceb350ecce2942 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 12:31:10 +0100 Subject: [PATCH 08/34] Update cmap-example --- examples/cmap-example/cmap-example.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index c6db1129b86d9..a86171b6c11f7 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -2,17 +2,18 @@ // there are two: std::map automatically sorts on key; std::unordered_map does not #include +#include int main() { std::map dict; - dict["apple"] = 5; - dict["banana"] = 2; - dict["orange"] = 7; + dict[std::string("apple")] = 5; + dict[std::string("banana")] = 2; + dict[std::string("orange")] = 7; // Accessing elements in the map - printf("Value of apple: %d\n", dict["apple"]); + printf("Value of apple: %d\n", dict[std::string("apple")]); for (const auto& pair : dict) { printf("Key: %s, Value: $s\n", pair.first, pair.second); From 317d195f68f79a8cadc8db38f63009c18d3f3c1b Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 12:33:20 +0100 Subject: [PATCH 09/34] Update cmap-examples --- examples/cmap-example/cmap-example.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index a86171b6c11f7..7b11dccda8051 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -16,7 +16,7 @@ int main() { printf("Value of apple: %d\n", dict[std::string("apple")]); for (const auto& pair : dict) { - printf("Key: %s, Value: $s\n", pair.first, pair.second); + printf("Key: %s, Value: $d\n", pair.first.c_str(), pair.second); } return 0; From 380a10f8b5c3f5b50379f5ccb60a240217f7c9c9 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 12:35:19 +0100 Subject: [PATCH 10/34] Update cmap-example --- examples/cmap-example/cmap-example.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index 7b11dccda8051..daf827e51706e 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -16,7 +16,7 @@ int main() { printf("Value of apple: %d\n", dict[std::string("apple")]); for (const auto& pair : dict) { - printf("Key: %s, Value: $d\n", pair.first.c_str(), pair.second); + printf("Key: %s, Value: %d\n", pair.first.c_str(), pair.second); } return 0; From 275d56e99eaf509702eddfbcdb4363c27545e4a3 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 15:38:21 +0100 Subject: [PATCH 11/34] Update cmap-example --- examples/cmap-example/cmap-example.cpp | 107 ++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index daf827e51706e..bc4e5ce7cdd94 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -2,10 +2,13 @@ // there are two: std::map automatically sorts on key; std::unordered_map does not #include +#include #include +#include int main() { std::map dict; + std::map> helpdict; dict[std::string("apple")] = 5; dict[std::string("banana")] = 2; @@ -13,10 +16,110 @@ int main() { // Accessing elements in the map - printf("Value of apple: %d\n", dict[std::string("apple")]); + printf("First kind of dictionary\n\nValue of apple: %d\n", dict[std::string("apple")]); for (const auto& pair : dict) { - printf("Key: %s, Value: %d\n", pair.first.c_str(), pair.second); + printf("Key: %10s, Value: %4d\n", pair.first.c_str(), pair.second); + } + + // Now try the helpdict idea + + printf("Second kind of dictionary\n"); + + // Create a list of strings + std::list stringList = {"apple", "banana", "orange"}; + + // Add key-value pair to map + helpdict["fruits"] = stringList; + + // Access and modify the list of strings + std::list& fruitsList = helpdict["fruits"]; + fruitsList.push_back("grape"); + fruitsList.push_back("pineapple"); + + for (const auto& pair : helpdict) { + printf("helpdict contains a list of %s\n", pair.first.c_str()); + for (const auto& element : pair.second) { + printf(" %s", element.c_str()); + } + printf("\n"); + } + + // Create a binary key for each value consisting of a list of strings + + std::map> bitdict; + + // Example binary key + int binaryKey1 = 0b0000001; + int binaryKey2 = 0b0000010; + int binaryKey3 = 0b0000100; + int binaryKey4 = 0b0001000; + int binaryKey5 = 0b0010000; + + // Convert binary key to string + std::string keyString1 = std::bitset<8>(binaryKey1).to_string(); + std::string keyString2 = std::bitset<8>(binaryKey2).to_string(); + std::string keyString3 = std::bitset<8>(binaryKey3).to_string(); + std::string keyString4 = std::bitset<8>(binaryKey4).to_string(); + std::string keyString5 = std::bitset<8>(binaryKey5).to_string(); + + // Add key-value pair to map + bitdict[keyString1] = {"-h", "--help", "print this help list and exit"}; + bitdict[keyString2] = {"-f", "FNAME", "--file", "FNAME", "read the prompts from an external text file"}; + bitdict[keyString3] = {"-n", "N", "--n-predict", "N", "number of tokens to predict in generating a completion"}; + bitdict[keyString4] = {"-t", "N", "--threads", "N", "number of threads to use"}; + bitdict[keyString5] = {"-m", "MODELPATH", "--model", "MODELPATH", "path to llama model to use"}; + + + for (const auto& pair : bitdict) { + printf("help dictionary contains a list of arguments specific to this app %s\n", pair.first.substr(pair.first.size() - 5).c_str()); + for (const auto& element : pair.second) { + printf(" %5s", element.c_str()); + } + printf("\n"); + } + + printf("\nThis is the complete help file in this mock-up illustrative example:\n\n"); + for (const auto& pair: bitdict) { + printf("%s ",pair.first.c_str()); + for (const auto& element : pair.second) { + printf(" %5s", element.c_str()); + } + printf("\n"); + } + + // Now we try to use the appcode to select from the help available + // app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101 + + int app1code = 0b01011; + int app2code = 0b10101; + + printf("\nNow processing app with only -h, -t and -f and appcode %3d\n", app1code); + if (app1code != 0) { + for (const auto& kvp : bitdict) { + if ((app1code & std::stoi(kvp.first)) != 0) { + printf("%s ",kvp.first.c_str()); + for (const auto& element : kvp.second) { + printf(" %5s", element.c_str()); + } + printf("\n"); + } + } + printf("\n"); + } + + printf("\nNow processing app with only -h, -m, -n and appcode %3d\n", app2code); + if (app2code != 0) { + for (const auto& kvp : bitdict) { + if ((app2code & std::stoi(kvp.first)) != 0) { + printf("%s ",kvp.first.c_str()); + for (const auto& element : kvp.second) { + printf(" %5s", element.c_str()); + } + printf("\n"); + } + } + printf("\n"); } return 0; From 297b7b6301b9ecc554084d22fea5ae1f2ae0b79e Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Thu, 5 Oct 2023 17:30:48 +0100 Subject: [PATCH 12/34] Automation --- examples/cmap-example/cmap-example.cpp | 26 +++++++++++++--- .../cmap-example/find_implemented_args.py | 30 +++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 examples/cmap-example/find_implemented_args.py diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index bc4e5ce7cdd94..b2a7b2735c597 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -5,6 +5,7 @@ #include #include #include +#include int main() { std::map dict; @@ -91,10 +92,10 @@ int main() { // Now we try to use the appcode to select from the help available // app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101 - int app1code = 0b01011; - int app2code = 0b10101; + int app1code = 0b0001011; + int app2code = 0b0010111; - printf("\nNow processing app with only -h, -t and -f and appcode %3d\n", app1code); + printf("\nNow processing app with only -h, -t and -f implemented and appcode %3d\n", app1code); if (app1code != 0) { for (const auto& kvp : bitdict) { if ((app1code & std::stoi(kvp.first)) != 0) { @@ -108,7 +109,7 @@ int main() { printf("\n"); } - printf("\nNow processing app with only -h, -m, -n and appcode %3d\n", app2code); + printf("\nNow processing app with only -h, -f, -m and -n implemented and appcode %3d\n", app2code); if (app2code != 0) { for (const auto& kvp : bitdict) { if ((app2code & std::stoi(kvp.first)) != 0) { @@ -122,5 +123,22 @@ int main() { printf("\n"); } + // This is more like the general way to do it + std::vector appcodes = {2, 5, 11, 17, 23, 31}; + for (size_t i = 0; i < appcodes.size(); ++i) { + int x = appcodes[i]; + if (x != 0) { + for (const auto& kvp : bitdict) { + if ((x & std::stoi(kvp.first)) != 0) { + printf("Appcode %3d %s ", x, kvp.first.c_str()); + for (const auto& element : kvp.second) { + printf(" %5s", element.c_str()); + } + printf("\n"); + } + } + printf("\n"); + } + } return 0; } \ No newline at end of file diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py new file mode 100644 index 0000000000000..8d745c822a227 --- /dev/null +++ b/examples/cmap-example/find_implemented_args.py @@ -0,0 +1,30 @@ +import os +import re + +def find_arguments(directory): + arguments = {} + + # Get a list of all .cpp files in the specified directory + cpp_files = [filename for filename in os.listdir(directory) if filename.endswith('.cpp')] + + # Read each .cpp file and search for the specified expressions + for filename in cpp_files: + with open(os.path.join(directory, filename), 'r') as file: + content = file.read() + + # Search for the expressions using regular expressions + matches = re.findall(r'argv\s*\[\s*i\s*\]\s*==\s*([\'"])(?P-[a-zA-Z]+|\-\-[a-zA-Z]+[a-zA-Z0-9-]*)\1', content) + + # Add the found arguments to the dictionary + arguments[filename] = [match[1] for match in matches] + + return arguments + + +# Specify the directory you want to search for cpp files +directory = '/Users/edsilm2/llama.cpp/examples' + +# Call the function and print the result +result = find_arguments(directory) +for filename, arguments in result.items(): + print(filename, arguments) \ No newline at end of file From 739d6d30227b15114e40881ac29eda04521e2a2c Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Fri, 6 Oct 2023 09:52:33 +0100 Subject: [PATCH 13/34] Automatic helper dev --- common/common.h | 1 + examples/cmap-example/cmap-example.cpp | 2 +- .../cmap-example/find_implemented_args.py | 55 ++++++--- help_list.txt | 104 ++++++++++++++++++ 4 files changed, 147 insertions(+), 15 deletions(-) create mode 100644 help_list.txt diff --git a/common/common.h b/common/common.h index e095c56e309c2..30260486f8896 100644 --- a/common/common.h +++ b/common/common.h @@ -79,6 +79,7 @@ struct gpt_params { std::string model_draft = ""; // draft model for speculative decoding std::string model_alias = "unknown"; // model alias std::string prompt = ""; + std::string prompt_file = ""; // store the external prompt file std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state std::string input_prefix = ""; // string to prefix user inputs with std::string input_suffix = ""; // string to suffix user inputs with diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index b2a7b2735c597..ece30702aa098 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -130,7 +130,7 @@ int main() { if (x != 0) { for (const auto& kvp : bitdict) { if ((x & std::stoi(kvp.first)) != 0) { - printf("Appcode %3d %s ", x, kvp.first.c_str()); + printf("appcode %3d %s ", x, kvp.first.c_str()); for (const auto& element : kvp.second) { printf(" %5s", element.c_str()); } diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py index 8d745c822a227..e4668943650ba 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find_implemented_args.py @@ -1,22 +1,27 @@ +# search the specified directory for files that include argv[i] == '-f' or '--file' arguments + import os import re def find_arguments(directory): arguments = {} - # Get a list of all .cpp files in the specified directory - cpp_files = [filename for filename in os.listdir(directory) if filename.endswith('.cpp')] + # Use os.walk() to traverse through files in directory and subdirectories + for root, dirs, files in os.walk(directory): + for file in files: + if file.endswith('.cpp'): + filepath = os.path.join(root, file) + with open(filepath, 'r') as file: + content = file.read() - # Read each .cpp file and search for the specified expressions - for filename in cpp_files: - with open(os.path.join(directory, filename), 'r') as file: - content = file.read() + # Search for the expression "params." and read the attribute without trailing detritus + matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content) - # Search for the expressions using regular expressions - matches = re.findall(r'argv\s*\[\s*i\s*\]\s*==\s*([\'"])(?P-[a-zA-Z]+|\-\-[a-zA-Z]+[a-zA-Z0-9-]*)\1', content) + # Remove duplicates from matches list + arguments_list = list(set([match.strip() for match in matches])) - # Add the found arguments to the dictionary - arguments[filename] = [match[1] for match in matches] + # Add the matches to the dictionary + arguments[filepath] = arguments_list return arguments @@ -24,7 +29,29 @@ def find_arguments(directory): # Specify the directory you want to search for cpp files directory = '/Users/edsilm2/llama.cpp/examples' -# Call the function and print the result -result = find_arguments(directory) -for filename, arguments in result.items(): - print(filename, arguments) \ No newline at end of file +if __name__ == '__main__': + # Call the find function and print the result + result = find_arguments(directory) + all_of_them = set() + for filename, arguments in result.items(): + print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n") + for argument in arguments: + if argument not in all_of_them: + all_of_them.add("".join(argument)) + print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.") + + with open("help_list.txt", "r") as helpfile: + lines = helpfile.read().split("\n") + for filename, arguments in result.items(): + parameters = [] + for line in lines: + for argument in arguments: + if argument in line: + parameters.append(line) + all_parameters = set(parameters) + print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n command-line arguments available and gpt-params functions implemented:\n") + if not all_parameters: + print(f" \033[032mNone\033[0m\n") + else: + for parameter in all_parameters: + print(f" help: \033[33m{parameter:<30}\033[0m") \ No newline at end of file diff --git a/help_list.txt b/help_list.txt new file mode 100644 index 0000000000000..08dcf67ec977c --- /dev/null +++ b/help_list.txt @@ -0,0 +1,104 @@ +-h, --helpshow this help message and exit +-i, --interactive run in interactive mode +--interactive-first run in interactive mode and wait for input right away +-ins, --instructrun in instruction mode (use with Alpaca models) +--multiline-input allows you to write or paste multiple lines without ending each in '\\' +-r PROMPT, --reverse-prompt PROMPT + halt generation at PROMPT, return control in interactive mode + (can be specified more than once for multiple prompts). +--color colorise output to distinguish prompt and user input from generations +-s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0) +-t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads); +-tb N, --threads-batch N + number of threads to use during batch and prompt processing (default: same as --threads) +-p PROMPT, --prompt PROMPT + prompt to start generation with (default: empty) +-e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) +--prompt-cache FNAME file to cache prompt state for faster startup (default: none) +--prompt-cache-all if specified, saves user input and generations to cache as well. + not supported with --interactive or other interactive options +--prompt-cache-ro if specified, uses the prompt cache but does not update it. +--random-prompt start with a randomized prompt. +--in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string +--in-prefix STRING string to prefix user inputs with (default: empty) +--in-suffix STRING string to suffix after user inputs with (default: empty) +-f FNAME, --file FNAME + prompt file to start generation. +-n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); +-c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); +-b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); +--top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); +--top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); +--tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); +--typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); +--repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); +--repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); +--presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); +--frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); +--mirostat N use Mirostat sampling. + Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used. + (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); +--mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); +--mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); +-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS + modifies the likelihood of token appearing in the completion, + i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello', + or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' +--grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir) +--grammar-file FNAME file to read grammar from +--cfg-negative-prompt PROMPT + negative prompt to use for guidance. (default: empty) +--cfg-negative-prompt-file FNAME + negative prompt file to use for guidance. (default: empty) +--cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); +--rope-scale N RoPE context linear scaling factor, inverse of --rope-freq-scale +--rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model) +--rope-freq-scale N RoPE frequency linear scaling factor (default: loaded from model) +--ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf) +--no-penalize-nldo not penalize newline token +--memory-f32 use f32 instead of f16 for memory key+value (default: disabled) + not recommended: doubles context memory required and no measurable increase in quality +--temp N temperature (default: %.1f)\n", (double)params.temp); +--logits-all return logits for all tokens in the batch (default: disabled) +--hellaswag compute HellaSwag score over random tasks from datafile supplied with -f +--hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); +--keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); +--draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft); +--chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); +-np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel); +-ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences); +-cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled) + if (llama_mlock_supported()) { + --mlock force system to keep model in RAM rather than swapping or compressing + } + if (llama_mmap_supported()) { + --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock) + } +--numa attempt optimizations that help on some NUMA systems + if run without this previously, it is recommended to drop the system page cache before using this + see https://github.com/ggerganov/llama.cpp/issues/1437 +#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD +-ngl N, --n-gpu-layers N + number of layers to store in VRAM +-ngld N, --n-gpu-layers-draft N + number of layers to store in VRAM for the draft model +-ts SPLIT --tensor-split SPLIT + how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1 +-mg i, --main-gpu i the GPU to use for scratch and small tensors +#ifdef GGML_USE_CUBLAS +-nommq, --no-mul-mat-q + use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels. + Not recommended since this is both slower and uses more VRAM. +#endif // GGML_USE_CUBLAS +#endif +--verbose-promptprint prompt before generation + fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles +--lora FNAME apply LoRA adapter (implies --no-mmap) +--lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap) +--lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter +-m FNAME, --model FNAME + model path (default: %s)\n", params.model.c_str()); +-md FNAME, --model-draft FNAME + draft model for speculative decoding (default: %s)\n", params.model.c_str()); +-ld LOGDIR, --logdir LOGDIR + path under which to save YAML logs (no logging if unset) \ No newline at end of file From 7a4dcff667833987a453d615362a9d41ab40807d Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Fri, 6 Oct 2023 14:50:17 +0100 Subject: [PATCH 14/34] Update contextual help dev --- .../cmap-example/find_implemented_args.py | 62 ++++++++--- help_list.txt | 104 +++++++++--------- 2 files changed, 98 insertions(+), 68 deletions(-) diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py index e4668943650ba..5c99a284de89e 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find_implemented_args.py @@ -2,6 +2,18 @@ import os import re +import collections +import re + +def replace_dashes_with_underscores(filename): + with open(filename, 'r') as file: + content = file.read() + + # Match '-' surrounded by word characters on both sides and replace with '_' + replaced_content = re.sub(r'(\w)-(\w)', r'\1_\2', content) + + with open(filename, 'w') as file: + file.write(replaced_content) def find_arguments(directory): arguments = {} @@ -14,9 +26,8 @@ def find_arguments(directory): with open(filepath, 'r') as file: content = file.read() - # Search for the expression "params." and read the attribute without trailing detritus - matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content) - + # Search for the expression "params." excluding prefixes and read the attribute without trailing detritus + matches = re.findall(r'(?:^|\s)params\.(.*?)(?=[\). <,;}]|\Z)', content) # Remove duplicates from matches list arguments_list = list(set([match.strip() for match in matches])) @@ -25,28 +36,34 @@ def find_arguments(directory): return arguments - -# Specify the directory you want to search for cpp files -directory = '/Users/edsilm2/llama.cpp/examples' - -if __name__ == '__main__': - # Call the find function and print the result - result = find_arguments(directory) +def output_results(result): + sorted_result = collections.OrderedDict(sorted(result.items())) all_of_them = set() - for filename, arguments in result.items(): - print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n") + for filename, arguments in sorted_result.items(): + print(f"Filename: \033[32m{filename.split('/')[-1]}\033[0m, arguments: {arguments}\n") for argument in arguments: if argument not in all_of_them: all_of_them.add("".join(argument)) print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.") + return sorted_result - with open("help_list.txt", "r") as helpfile: +def find_parameters(file, sorted_result): + with open(file, "r") as helpfile: lines = helpfile.read().split("\n") - for filename, arguments in result.items(): + for filename, arguments in sorted_result.items(): parameters = [] for line in lines: for argument in arguments: - if argument in line: + # need to try to avoid spurious matches + argument1 = "--" + argument + " " + if argument1 in line: + parameters.append(line) + # need to try to avoid spurious matches + argument2 = "params." + argument.split('n_')[-1] + if argument2 in line: + parameters.append(line) + argument3 = "params." + argument + if argument3 in line: parameters.append(line) all_parameters = set(parameters) print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n command-line arguments available and gpt-params functions implemented:\n") @@ -54,4 +71,17 @@ def find_arguments(directory): print(f" \033[032mNone\033[0m\n") else: for parameter in all_parameters: - print(f" help: \033[33m{parameter:<30}\033[0m") \ No newline at end of file + print(f" help: \033[33m{parameter:<30}\033[0m") + + +# Specify the directory you want to search for cpp files +directory = '/Users/edsilm2/llama.cpp/examples' + +if __name__ == '__main__': + # First we alter all the hyphenated help words in help-file.txt to underscores + replace_dashes_with_underscores('help_list.txt') + # Call the find function and output the result + result = find_arguments(directory) + sorted = output_results(result) + # analyse the files and what they contain + find_parameters("help_list.txt", sorted) \ No newline at end of file diff --git a/help_list.txt b/help_list.txt index 08dcf67ec977c..7bf5b8c78a385 100644 --- a/help_list.txt +++ b/help_list.txt @@ -1,104 +1,104 @@ -h, --helpshow this help message and exit -i, --interactive run in interactive mode ---interactive-first run in interactive mode and wait for input right away +--interactive_first run in interactive mode and wait for input right away -ins, --instructrun in instruction mode (use with Alpaca models) ---multiline-input allows you to write or paste multiple lines without ending each in '\\' --r PROMPT, --reverse-prompt PROMPT +--multiline_input allows you to write or paste multiple lines without ending each in '\\' +-r PROMPT, --reverse_prompt PROMPT halt generation at PROMPT, return control in interactive mode (can be specified more than once for multiple prompts). --color colorise output to distinguish prompt and user input from generations -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0) -t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads); --tb N, --threads-batch N +-tb N, --threads_batch N number of threads to use during batch and prompt processing (default: same as --threads) -p PROMPT, --prompt PROMPT prompt to start generation with (default: empty) -e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) ---prompt-cache FNAME file to cache prompt state for faster startup (default: none) ---prompt-cache-all if specified, saves user input and generations to cache as well. +--prompt_cache FNAME file to cache prompt state for faster startup (default: none) +--prompt_cache_all if specified, saves user input and generations to cache as well. not supported with --interactive or other interactive options ---prompt-cache-ro if specified, uses the prompt cache but does not update it. ---random-prompt start with a randomized prompt. ---in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string ---in-prefix STRING string to prefix user inputs with (default: empty) ---in-suffix STRING string to suffix after user inputs with (default: empty) +--prompt_cache_ro if specified, uses the prompt cache but does not update it. +--random_prompt start with a randomized prompt. +--in_prefix_bos prefix BOS to user inputs, preceding the `--in_prefix` string +--in_prefix STRING string to prefix user inputs with (default: empty) +--in_suffix STRING string to suffix after user inputs with (default: empty) -f FNAME, --file FNAME prompt file to start generation. --n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); --c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); --b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); ---top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); ---top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); +-n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); +-c N, --ctx_size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); +-b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); +--top_k N top_k sampling (default: %d, 0 = disabled)\n", params.top_k); +--top_p N top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); ---repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); ---repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); ---presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); ---frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); +--repeat_last_n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); +--repeat_penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); +--presence_penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); +--frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); --mirostat N use Mirostat sampling. Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used. (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); ---mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); ---mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); --l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS +--mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); +--mirostat_ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); +-l TOKEN_ID(+/-)BIAS, --logit_bias TOKEN_ID(+/-)BIAS modifies the likelihood of token appearing in the completion, - i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello', - or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' ---grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir) ---grammar-file FNAME file to read grammar from ---cfg-negative-prompt PROMPT + i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello', + or `--logit_bias 15043_1` to decrease likelihood of token ' Hello' +--grammar GRAMMAR BNF_like grammar to constrain generations (see samples in grammars/ dir) +--grammar_file FNAME file to read grammar from +--cfg_negative_prompt PROMPT negative prompt to use for guidance. (default: empty) ---cfg-negative-prompt-file FNAME +--cfg_negative_prompt_file FNAME negative prompt file to use for guidance. (default: empty) ---cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); ---rope-scale N RoPE context linear scaling factor, inverse of --rope-freq-scale ---rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model) ---rope-freq-scale N RoPE frequency linear scaling factor (default: loaded from model) ---ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf) ---no-penalize-nldo not penalize newline token ---memory-f32 use f32 instead of f16 for memory key+value (default: disabled) +--cfg_scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); +--rope_scale N RoPE context linear scaling factor, inverse of --rope_freq_scale +--rope_freq_base N RoPE base frequency, used by NTK_aware scaling (default: loaded from model) +--rope_freq_scale N RoPE frequency linear scaling factor (default: loaded from model) +--ignore_eos ignore end of stream token and continue generating (implies --logit_bias 2_inf) +--no_penalize_nldo not penalize newline token +--memory_f32 use f32 instead of f16 for memory key+value (default: disabled) not recommended: doubles context memory required and no measurable increase in quality --temp N temperature (default: %.1f)\n", (double)params.temp); ---logits-all return logits for all tokens in the batch (default: disabled) +--logits_all return logits for all tokens in the batch (default: disabled) --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f ---hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); +--hellaswag_tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); --draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft); --chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); -np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel); -ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences); --cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled) +-cb, --cont_batching enable continuous batching (a.k.a dynamic batching) (default: disabled) if (llama_mlock_supported()) { --mlock force system to keep model in RAM rather than swapping or compressing } if (llama_mmap_supported()) { - --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock) + --no_mmap do not memory_map model (slower load but may reduce pageouts if not using mlock) } --numa attempt optimizations that help on some NUMA systems if run without this previously, it is recommended to drop the system page cache before using this see https://github.com/ggerganov/llama.cpp/issues/1437 #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD --ngl N, --n-gpu-layers N +-ngl N, --n_gpu_layers N number of layers to store in VRAM --ngld N, --n-gpu-layers-draft N +-ngld N, --n_gpu_layers_draft N number of layers to store in VRAM for the draft model --ts SPLIT --tensor-split SPLIT - how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1 --mg i, --main-gpu i the GPU to use for scratch and small tensors +-ts SPLIT --tensor_split SPLIT + how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1 +-mg i, --main_gpu i the GPU to use for scratch and small tensors #ifdef GGML_USE_CUBLAS --nommq, --no-mul-mat-q +-nommq, --no_mul_mat_q use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels. Not recommended since this is both slower and uses more VRAM. #endif // GGML_USE_CUBLAS #endif ---verbose-promptprint prompt before generation - fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles ---lora FNAME apply LoRA adapter (implies --no-mmap) ---lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap) ---lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter +--verbose_promptprint prompt before generation + fprintf(stderr, " --simple_io use basic IO for better compatibility in subprocesses and limited consoles +--lora FNAME apply LoRA adapter (implies --no_mmap) +--lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap) +--lora_base FNAME optional model to use as a base for the layers modified by the LoRA adapter -m FNAME, --model FNAME model path (default: %s)\n", params.model.c_str()); --md FNAME, --model-draft FNAME +-md FNAME, --model_draft FNAME draft model for speculative decoding (default: %s)\n", params.model.c_str()); -ld LOGDIR, --logdir LOGDIR path under which to save YAML logs (no logging if unset) \ No newline at end of file From 0d70518220102c6d9e0cf71339587a1eb763c165 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Fri, 6 Oct 2023 22:19:29 +0100 Subject: [PATCH 15/34] Update contextual help --- examples/cmap-example/find_implemented_args.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py index 5c99a284de89e..feae80ffd1764 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find_implemented_args.py @@ -27,12 +27,13 @@ def find_arguments(directory): content = file.read() # Search for the expression "params." excluding prefixes and read the attribute without trailing detritus - matches = re.findall(r'(?:^|\s)params\.(.*?)(?=[\). <,;}]|\Z)', content) + # matches = re.findall(r'(?:^|\s)params\.(.*)(?=[\). <,;}]|\Z)', content) + matches = set(re.findall(r'(?:^|\b)params\.([a-zA-Z_0-9]*)(?=[\). <,;}]|\Z)', content)) # Remove duplicates from matches list - arguments_list = list(set([match.strip() for match in matches])) + # arguments_list = list(set([match.strip() for match in matches])) # Add the matches to the dictionary - arguments[filepath] = arguments_list + arguments[filepath] = matches return arguments @@ -53,6 +54,12 @@ def find_parameters(file, sorted_result): for filename, arguments in sorted_result.items(): parameters = [] for line in lines: + for argument in arguments: + # building pattern to avoid spurious matches + pattern = r"(?:--{}\s)|(?:params\.{}[\s.,();])".format(argument, argument.split('n_')[-1]) + if re.search(pattern, line): + parameters.append(line) + '''for line in lines: for argument in arguments: # need to try to avoid spurious matches argument1 = "--" + argument + " " @@ -64,7 +71,7 @@ def find_parameters(file, sorted_result): parameters.append(line) argument3 = "params." + argument if argument3 in line: - parameters.append(line) + parameters.append(line)''' all_parameters = set(parameters) print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n command-line arguments available and gpt-params functions implemented:\n") if not all_parameters: From 9c5d6f0ef661c17a8b140bf122b3e95bf6b288e2 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Sat, 7 Oct 2023 21:40:45 +0100 Subject: [PATCH 16/34] Update helper dev --- common/common.h | 24 +++--- .../cmap-example/find_implemented_args.py | 77 ++++++++++++++----- examples/cmap-example/read_common_h.py | 34 ++++++++ examples/parallel/README.md | 8 +- 4 files changed, 112 insertions(+), 31 deletions(-) create mode 100644 examples/cmap-example/read_common_h.py diff --git a/common/common.h b/common/common.h index 30260486f8896..887142cf99a7f 100644 --- a/common/common.h +++ b/common/common.h @@ -35,21 +35,21 @@ int32_t get_num_physical_cores(); struct gpt_params { uint32_t seed = -1; // RNG seed - int32_t n_threads = get_num_physical_cores(); - int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads) + int32_t n_threads = get_num_physical_cores(); // user-defined or num of internal physical cores + int32_t n_threads_batch = -1; // num threads for batch proc (-1 = use n_threads) int32_t n_predict = -1; // new tokens to predict int32_t n_ctx = 512; // context size - int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_batch = 512; // batch size for prompt proc (>=32 to use BLAS) int32_t n_keep = 0; // number of tokens to keep from initial prompt int32_t n_draft = 16; // number of tokens to draft during speculative decoding int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) int32_t n_parallel = 1; // number of parallel sequences to decode int32_t n_sequences = 1; // number of sequences to decode - int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) - int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default) + int32_t n_gpu_layers = -1; // num layers stored in VRAM (-1 for default) + int32_t n_gpu_layers_draft = -1; // num layers stored in VRAM for draft mod (-1 for default) int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs - int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. + int32_t n_probs = 0; // if > 0, output probabilities of top n_probs tokens. int32_t n_beams = 0; // if non-zero then use beam search of given width. float rope_freq_base = 0.0f; // RoPE base frequency float rope_freq_scale = 0.0f; // RoPE frequency scaling factor @@ -61,7 +61,7 @@ struct gpt_params { float typical_p = 1.00f; // 1.0 = disabled float temp = 0.80f; // 1.0 = disabled float repeat_penalty = 1.10f; // 1.0 = disabled - int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) + int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable, -1 = cxt size) float frequency_penalty = 0.00f; // 0.0 = disabled float presence_penalty = 0.00f; // 0.0 = disabled int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 @@ -78,7 +78,7 @@ struct gpt_params { std::string model = "models/7B/ggml-model-f16.gguf"; // model path std::string model_draft = ""; // draft model for speculative decoding std::string model_alias = "unknown"; // model alias - std::string prompt = ""; + std::string prompt = ""; // user-provided single prompt std::string prompt_file = ""; // store the external prompt file std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state std::string input_prefix = ""; // string to prefix user inputs with @@ -90,11 +90,11 @@ struct gpt_params { std::vector> lora_adapter; // lora adapter path with user defined scale std::string lora_base = ""; // base model path for the lora adapter - int ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used. - int ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line + int ppl_stride = 0; // stride for ppl calcs. 0: the pre-existing approach will be used. + int ppl_output_type = 0; // 0: ppl output as usual, 1: ppl output = num_tokens, ppl, one per line // (which is more convenient to use for plotting) // - bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt + bool hellaswag = false; // compute HellaSwag score from datafile given in prompt size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score bool mul_mat_q = true; // if true, use mul_mat_q kernels instead of cuBLAS @@ -109,7 +109,7 @@ struct gpt_params { bool escape = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\" bool interactive_first = false; // wait for user input immediately bool multiline_input = false; // reverse the usage of `\` - bool simple_io = false; // improves compatibility with subprocesses and limited consoles + bool simple_io = false; // improves compat'y with subprocs and ltd consoles bool cont_batching = false; // insert new sequences for decoding on-the-fly bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py index feae80ffd1764..31d4396980211 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find_implemented_args.py @@ -4,6 +4,8 @@ import re import collections import re +import read_common_h + def replace_dashes_with_underscores(filename): with open(filename, 'r') as file: @@ -48,6 +50,13 @@ def output_results(result): print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.") return sorted_result +def concatenate(v): + concatenated_element = "" + for i, element in enumerate(v): + if element == "//": + concatenated_element = " ".join(v[i:]) + return concatenated_element + def find_parameters(file, sorted_result): with open(file, "r") as helpfile: lines = helpfile.read().split("\n") @@ -59,36 +68,68 @@ def find_parameters(file, sorted_result): pattern = r"(?:--{}\s)|(?:params\.{}[\s.,();])".format(argument, argument.split('n_')[-1]) if re.search(pattern, line): parameters.append(line) - '''for line in lines: - for argument in arguments: - # need to try to avoid spurious matches - argument1 = "--" + argument + " " - if argument1 in line: - parameters.append(line) - # need to try to avoid spurious matches - argument2 = "params." + argument.split('n_')[-1] - if argument2 in line: - parameters.append(line) - argument3 = "params." + argument - if argument3 in line: - parameters.append(line)''' - all_parameters = set(parameters) - print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n command-line arguments available and gpt-params functions implemented:\n") + + all_parameters = set(parameters) + file = filename.split('/')[-1] + print("\n\n"+"#"*(10+len(file))) + print(f"Filename: \033[32m{file}\033[0m") + print("#"*(10+len(file))) + print(f"\n\n command-line arguments available and gpt-params functions implemented (TODO: multi-line helps NEED SOME WORK):\n") + if not all_parameters: print(f" \033[032mNone\033[0m\n") + + # first do it the original way else: + help_count = 0 for parameter in all_parameters: - print(f" help: \033[33m{parameter:<30}\033[0m") + help_count += 1 + print(f"{help_count:>2} help: \033[33m{parameter:<30}\033[0m") + # now do it the new way + print("\nNow we extract the original gpt_params definition and defaults for implemented arguments:\n") + gpt_count = 0 + for k,v in read_common_h.parameters.items(): + if not read_common_h.parameters.items(): + print(f" \033[032mNone\033[0m\n") + elif k in arguments: + # print(f"gpt_params: \033[33m{k:>20}\033[0m values: {v}") + concatenated_element = concatenate(v) + gpt_count += 1 + print(f"{gpt_count:>2} gpt_param: \033[32m{k:>19}; \033[34mrole: \033[33m{concatenated_element:<60}\033[0m; \033[34mdefault: \033[30m{v[1]:<10}\033[0m ") + + # searching the other way round is quicker: + print("\nSearching the other way round is quicker:\n") + key_count = 0 + for argument in arguments: + if argument in read_common_h.parameters: + key_count += 1 + print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(read_common_h.parameters[argument]):<60}; default: {read_common_h.parameters[argument][1]:<10}") + if help_count == gpt_count and gpt_count == key_count: + print("\n\033[032mNo unresolved help-list incompatibilities with this app.\033[0m") + else: + print("\n\033[031mThis app requires some attention regarding help-function consistency.\033[0m") # Specify the directory you want to search for cpp files directory = '/Users/edsilm2/llama.cpp/examples' if __name__ == '__main__': + # get the parameters from the common.h file utiity we import + print(read_common_h.parameters) + # So now we've got the gpt_parameters in this parameters dict + # First we alter all the hyphenated help words in help-file.txt to underscores - replace_dashes_with_underscores('help_list.txt') - # Call the find function and output the result + # replace_dashes_with_underscores('help_list.txt') + # This above may no longer be needed + + print("\n####################### find parameters #################################") + # Call the find function to collect all the params.attributes and output the result result = find_arguments(directory) + + print("\n######################################## output_results #################################") + # sort the results and output them sorted = output_results(result) + + print("\n######################## find help context parameters #################################") # analyse the files and what they contain find_parameters("help_list.txt", sorted) \ No newline at end of file diff --git a/examples/cmap-example/read_common_h.py b/examples/cmap-example/read_common_h.py new file mode 100644 index 0000000000000..a683d7662b204 --- /dev/null +++ b/examples/cmap-example/read_common_h.py @@ -0,0 +1,34 @@ +# read common.h and extract the parameters name list + +import re + +# Read the file into separate lines +with open('common/common.h', 'r') as file: + lines = file.read().split('\n') + +parameters = {} +inside = False +for line in lines: + # non_whitespace_elements = re.findall(r"\S+", line) + non_whitespace_elements = re.findall(r"[^\s}{=;]+", line) + print(f"nwe = \033[33m{non_whitespace_elements}\033[0m") + if non_whitespace_elements and non_whitespace_elements[0] == "struct": + inside = True + if len(non_whitespace_elements) > 2 and inside: + # note: cannot use nwe[0] because types do not generate unique keys and so overwrite + # here we deliberately add back the key so we can make a manual change when it is different + parameters[non_whitespace_elements[1]] = non_whitespace_elements[1:] + for k, v in parameters.items(): + print(f"key: {k:<20}; values: {v}") + + concatenated_element = "" + for i, element in enumerate(v): + if element == "//": + concatenated_element = " ".join(v[i:]) + # break + print(" "*10 + f"parameter: \033[32m{k:>40} \033[34mdefault: \033[30m{v[1]:>5} \033[34mcommment: \033[33m{concatenated_element:80}\033[0m") + + # this is a bit of a hack to terminate the harvest + if len(non_whitespace_elements) > 2 and non_whitespace_elements[1] == "infill": + inside = False + break \ No newline at end of file diff --git a/examples/parallel/README.md b/examples/parallel/README.md index 4d0fe5cef12fa..fea4a9d918c70 100644 --- a/examples/parallel/README.md +++ b/examples/parallel/README.md @@ -1,3 +1,9 @@ # llama.cpp/example/parallel -Simplified simluation for serving incoming requests in parallel +Simplified simulation for serving incoming requests in parallel + +Running this using the 100 questions in examples/jeopardy/questions.txt +on an M2 MAX (38 core) with 32GB unified memory on MacOS Sonoma 14.0 +takes about 235 seconds with sequential responses (-ns 1) and 45 seconds +with 64 parallel responses (-ns 64) in both cases generating 100 answers (-np 100) +using a context of 8192 (-c 8192). From 982c9089847e07ae33ccaebfbfc99778a55cc7fc Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Sun, 8 Oct 2023 22:26:13 +0100 Subject: [PATCH 17/34] Update contextual help --- common/common.cpp | 15 +- common/common.h | 1 + .../cmap-example/find_implemented_args.py | 126 +++++++++-- examples/cmap-example/read_common_h.py | 27 ++- help_list.txt | 208 +++++++++--------- 5 files changed, 240 insertions(+), 137 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 4b233786abc00..4d4ecf03df092 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -621,6 +621,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { return true; } +// There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too): +// --embedding, --beams, --ppl-stride, --ppl-output-type, memory-f32, no-mmap, mlock, use-color, nprobs void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf("usage: %s [options]\n", argv[0]); printf("\n"); @@ -667,7 +669,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); printf(" --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); printf(" --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); - printf(" -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n"); + printf(" -l T, --logit-bias T T = TOKEN_ID(plus/minus)BIAS\n"); printf(" modifies the likelihood of token appearing in the completion,\n"); printf(" i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"); printf(" or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); @@ -682,7 +684,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model)\n"); printf(" --rope-freq-scale N RoPE frequency linear scaling factor (default: loaded from model)\n"); printf(" --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); - printf(" --no-penalize-nl do not penalize newline token\n"); + printf(" --no-penalize-nl do not penalize newline token (default is DO penalise nl token)\n"); printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n"); printf(" not recommended: doubles context memory required and no measurable increase in quality\n"); printf(" --temp N temperature (default: %.1f)\n", (double)params.temp); @@ -729,6 +731,15 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" draft model for speculative decoding (default: %s)\n", params.model.c_str()); printf(" -ld LOGDIR, --logdir LOGDIR\n"); printf(" path under which to save YAML logs (no logging if unset)\n"); + printf(" --ppl-stride stride for ppl calcs. 0 (default): the pre-existing approach will be used.\n"); + printf(" --ppl-output-type 0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n"); + printf(" --embedding 0 (default): get only sentence embedding\n"); + printf(" --beams N 0 (default): if non-zero use beam search of given width N.\n"); + printf(" --memory-f32 0 (default): if true (= 1) disable f16 memory.\n"); + printf(" --no-mmap 0 (default): if true use mmap for faster loads.\n"); + printf(" --mlock 0 (default): if true keep model in memory.\n"); + printf(" --use-color 0 (default): use color to distinguish generations from inputs\n"); + printf(" --nprobs N if > 0 output the probabilities of the top N tokens\n"); printf("\n"); } diff --git a/common/common.h b/common/common.h index 887142cf99a7f..d48ee22e1427e 100644 --- a/common/common.h +++ b/common/common.h @@ -75,6 +75,7 @@ struct gpt_params { std::string cfg_negative_prompt; // string to help guidance float cfg_scale = 1.f; // How strong is guidance + std::string help = ""; // universal help parameter std::string model = "models/7B/ggml-model-f16.gguf"; // model path std::string model_draft = ""; // draft model for speculative decoding std::string model_alias = "unknown"; // model alias diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py index 31d4396980211..ac6e6cfdc8695 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find_implemented_args.py @@ -6,7 +6,22 @@ import re import read_common_h - +# update the source file - usually 'help_list.txt', so the default - in case the source file has been changed +def update_file(file_from, file_to = "help_list.txt"): + # Open the file_from file + with open(file_from, "r") as file: + lines = file.readlines() + + # Find lines starting with "printf(" and ending with ");" (assumes file_from is written in C/C++) + pattern = r'printf\("\s(.*?)\);' + matched_lines = [re.search(pattern, line).group(1) for line in lines if re.search(pattern, line)] + + # Save matched lines to file_to + with open(file_to, "w") as file: + for line in matched_lines: + file.write(line + '\n') + +# helper fn to make the hyphenated words in a file snake-case for searching def replace_dashes_with_underscores(filename): with open(filename, 'r') as file: content = file.read() @@ -17,6 +32,13 @@ def replace_dashes_with_underscores(filename): with open(filename, 'w') as file: file.write(replaced_content) +# helper fn to make the underscored words in a file hyphenated for print +def replace_underscores_with_dashes(parameter): + # Match '_' surrounded by word characters on both sides and replace with '-' + return re.sub(r'(\w)_(\w)', r'\1-\2', parameter) + + +# find all instances of "params." in the *.cpp files in a directory def find_arguments(directory): arguments = {} @@ -28,21 +50,21 @@ def find_arguments(directory): with open(filepath, 'r') as file: content = file.read() - # Search for the expression "params." excluding prefixes and read the attribute without trailing detritus + # Search for the expression "params." or "params->" excluding prefixes and read the attribute without trailing detritus # matches = re.findall(r'(?:^|\s)params\.(.*)(?=[\). <,;}]|\Z)', content) - matches = set(re.findall(r'(?:^|\b)params\.([a-zA-Z_0-9]*)(?=[\). <,;}]|\Z)', content)) - # Remove duplicates from matches list - # arguments_list = list(set([match.strip() for match in matches])) + matches = set(re.findall(r'(?:^|\b)params[->\.]([a-zA-Z_0-9]*)(?=[\). <,;}]|\Z)', content)) # Add the matches to the dictionary arguments[filepath] = matches return arguments +# output a list of the params.attributes for each file def output_results(result): sorted_result = collections.OrderedDict(sorted(result.items())) all_of_them = set() for filename, arguments in sorted_result.items(): + arguments.add("help") print(f"Filename: \033[32m{filename.split('/')[-1]}\033[0m, arguments: {arguments}\n") for argument in arguments: if argument not in all_of_them: @@ -50,6 +72,7 @@ def output_results(result): print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.") return sorted_result +# put all the words after "//" in a dict back together with spaces def concatenate(v): concatenated_element = "" for i, element in enumerate(v): @@ -57,24 +80,78 @@ def concatenate(v): concatenated_element = " ".join(v[i:]) return concatenated_element +def title_print(filename): + title = filename.split('/')[-1] + print("\n\n"+"#"*(10+len(title))) + print(f"Filename: \033[32m{title}\033[0m") + print("#"*(10+len(title))) + +def substitution_list(parameters): + # store untrapped parameters as identicals in case we need to change them later + sub_dict = {"n_threads": "threads", + "n_ctx": "ctx_size", + "n_draft" : "draft", + "n_threads_batch" : "threads_batch", + "n_chunks" : "chunks", + "n_batch" : "batch_size", + "n_sequences" : "sequences", + "n_parallel" : "parallel", + "n_beams" : "beams", + "n_keep" : "keep", + "n_probs" : "nprobs", + "path_prompt_cache" : "prompt_cache", + "input_prefix" : "in_prefix", + "input_suffix" : "in_suffix", + "input_prefix_bos" : "in_prefix_bos", + "antiprompt" : "reverse_prompt", + "mul_mat_q" : "no_mul_mat_q", + "use_mmap" : "no_mmap", + "use_mlock" : "mlock", + "model_alias" : "alias", + "tfs_z" : "tfs", + "use_color" : "color", + "logit_bias" : "logit_bias", + "ignore_eos" : "ignore_eos", + "mirostat_tau" : "mirostat_ent", + "mirostat_eta" : "mirostat_lr", + "penalize_nl" : "no_penalize_nl", + "typical_p" : "typical", + "mem_size" : "mem_size", + "mem_buffer" : "mem_buffer", + "no_alloc" : "no_alloc" + } + new_parameters = [] + for parameter in parameters: + if parameter in sub_dict: + # we need both for future reference + new_parameters.append(parameter) + new_parameters.append(sub_dict[parameter]) + else: + new_parameters.append(parameter) + return new_parameters + +# output the lines of the help file def find_parameters(file, sorted_result): with open(file, "r") as helpfile: lines = helpfile.read().split("\n") for filename, arguments in sorted_result.items(): + # we try to fix up some variant labelling in help_file.txt + arguments = substitution_list(arguments) parameters = [] for line in lines: for argument in arguments: # building pattern to avoid spurious matches - pattern = r"(?:--{}\s)|(?:params\.{}[\s.,();])".format(argument, argument.split('n_')[-1]) - if re.search(pattern, line): + # pattern = r"(?:--{}\s)|(?:params\.{}[\s.,\.();])".format(argument, argument.split('n_')[-1]) + pattern = r"(?:--{}\s)|(?:params\.{}(?=[\s.,\.\(\);]|\.+\w))".format(argument, argument.split('n_')[-1]) + # pattern = r"(?<=params\.)\w+(?=\.\w+|\.|,|;|\}|\{|\(|\)|\.)" + # bit of a hack to exclude --attributes at the end of help comment lines + if re.search(pattern, line[:50]): parameters.append(line) all_parameters = set(parameters) - file = filename.split('/')[-1] - print("\n\n"+"#"*(10+len(file))) - print(f"Filename: \033[32m{file}\033[0m") - print("#"*(10+len(file))) - print(f"\n\n command-line arguments available and gpt-params functions implemented (TODO: multi-line helps NEED SOME WORK):\n") + + title_print(filename) + print(f"\nCommand-line arguments available and gpt-params functions implemented (TODO: multi-line helps NEED SOME WORK):\n") if not all_parameters: print(f" \033[032mNone\033[0m\n") @@ -83,11 +160,16 @@ def find_parameters(file, sorted_result): else: help_count = 0 for parameter in all_parameters: - help_count += 1 - print(f"{help_count:>2} help: \033[33m{parameter:<30}\033[0m") + # reverse the hypthen/underscore pattern just for printing + replaced_param = replace_underscores_with_dashes(parameter) + if not parameter.startswith(" "): + help_count += 1 + print(f"{help_count:>2} help: \033[33m{replaced_param:<30}\033[0m") + else: + print(f" help: \033[33m{replaced_param:<30}\033[0m") # now do it the new way - print("\nNow we extract the original gpt_params definition and defaults for implemented arguments:\n") + print("\nNow we extract the original gpt_params definition from common.h with the defaults for implemented arguments:\n") gpt_count = 0 for k,v in read_common_h.parameters.items(): if not read_common_h.parameters.items(): @@ -99,14 +181,14 @@ def find_parameters(file, sorted_result): print(f"{gpt_count:>2} gpt_param: \033[32m{k:>19}; \033[34mrole: \033[33m{concatenated_element:<60}\033[0m; \033[34mdefault: \033[30m{v[1]:<10}\033[0m ") # searching the other way round is quicker: - print("\nSearching the other way round is quicker:\n") + print("\nSearching the other way round is more efficient:\n") key_count = 0 - for argument in arguments: + for argument in set(arguments): if argument in read_common_h.parameters: key_count += 1 print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(read_common_h.parameters[argument]):<60}; default: {read_common_h.parameters[argument][1]:<10}") if help_count == gpt_count and gpt_count == key_count: - print("\n\033[032mNo unresolved help-list incompatibilities with this app.\033[0m") + print(f"\n\033[032mNo unresolved help-list incompatibilities with \033[33m{filename.split('/')[-1]}\033[0m") else: print("\n\033[031mThis app requires some attention regarding help-function consistency.\033[0m") @@ -114,13 +196,17 @@ def find_parameters(file, sorted_result): directory = '/Users/edsilm2/llama.cpp/examples' if __name__ == '__main__': + + # update the source help file from C++ source (this works exactly as required) + update_file("common/common.cpp", "help_list.txt") + # get the parameters from the common.h file utiity we import print(read_common_h.parameters) # So now we've got the gpt_parameters in this parameters dict # First we alter all the hyphenated help words in help-file.txt to underscores - # replace_dashes_with_underscores('help_list.txt') - # This above may no longer be needed + # we later reverse these changers before printing the help lines + replace_dashes_with_underscores('help_list.txt') print("\n####################### find parameters #################################") # Call the find function to collect all the params.attributes and output the result diff --git a/examples/cmap-example/read_common_h.py b/examples/cmap-example/read_common_h.py index a683d7662b204..1c18d496002aa 100644 --- a/examples/cmap-example/read_common_h.py +++ b/examples/cmap-example/read_common_h.py @@ -7,6 +7,9 @@ lines = file.read().split('\n') parameters = {} +# we add the logit_bias parameter which otherwise is not found +parameters['logit_bias']=['logit_bias', '0', '//', 'way', 'to', 'alter', 'prob', 'of', 'particular', 'words'] + inside = False for line in lines: # non_whitespace_elements = re.findall(r"\S+", line) @@ -18,17 +21,19 @@ # note: cannot use nwe[0] because types do not generate unique keys and so overwrite # here we deliberately add back the key so we can make a manual change when it is different parameters[non_whitespace_elements[1]] = non_whitespace_elements[1:] - for k, v in parameters.items(): - print(f"key: {k:<20}; values: {v}") - - concatenated_element = "" - for i, element in enumerate(v): - if element == "//": - concatenated_element = " ".join(v[i:]) - # break - print(" "*10 + f"parameter: \033[32m{k:>40} \033[34mdefault: \033[30m{v[1]:>5} \033[34mcommment: \033[33m{concatenated_element:80}\033[0m") - + # remove spurious entry caused by eccentric status of logit_bias + if "float>" in parameters and parameters["float>"][1] == 'logit_bias': + del parameters["float>"] + # this is a bit of a hack to terminate the harvest if len(non_whitespace_elements) > 2 and non_whitespace_elements[1] == "infill": inside = False - break \ No newline at end of file + break +for k, v in parameters.items(): + print(f"key: {k:<20}; values: {v}") + concatenated_element = "" + for i, element in enumerate(v): + if element == "//": + concatenated_element = " ".join(v[i:]) + # break + print(" "*10 + f"parameter: \033[32m{k:>40} \033[34mdefault: \033[30m{v[1]:>5} \033[34mcommment: \033[33m{concatenated_element:80}\033[0m") diff --git a/help_list.txt b/help_list.txt index 7bf5b8c78a385..97b91a98259d5 100644 --- a/help_list.txt +++ b/help_list.txt @@ -1,104 +1,104 @@ --h, --helpshow this help message and exit --i, --interactive run in interactive mode ---interactive_first run in interactive mode and wait for input right away --ins, --instructrun in instruction mode (use with Alpaca models) ---multiline_input allows you to write or paste multiple lines without ending each in '\\' --r PROMPT, --reverse_prompt PROMPT - halt generation at PROMPT, return control in interactive mode - (can be specified more than once for multiple prompts). ---color colorise output to distinguish prompt and user input from generations --s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0) --t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads); --tb N, --threads_batch N - number of threads to use during batch and prompt processing (default: same as --threads) --p PROMPT, --prompt PROMPT - prompt to start generation with (default: empty) --e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) ---prompt_cache FNAME file to cache prompt state for faster startup (default: none) ---prompt_cache_all if specified, saves user input and generations to cache as well. - not supported with --interactive or other interactive options ---prompt_cache_ro if specified, uses the prompt cache but does not update it. ---random_prompt start with a randomized prompt. ---in_prefix_bos prefix BOS to user inputs, preceding the `--in_prefix` string ---in_prefix STRING string to prefix user inputs with (default: empty) ---in_suffix STRING string to suffix after user inputs with (default: empty) --f FNAME, --file FNAME - prompt file to start generation. --n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); --c N, --ctx_size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); --b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); ---top_k N top_k sampling (default: %d, 0 = disabled)\n", params.top_k); ---top_p N top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); ---tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); ---typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); ---repeat_last_n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); ---repeat_penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); ---presence_penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); ---frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); ---mirostat N use Mirostat sampling. - Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used. - (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); ---mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); ---mirostat_ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); --l TOKEN_ID(+/-)BIAS, --logit_bias TOKEN_ID(+/-)BIAS - modifies the likelihood of token appearing in the completion, - i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello', - or `--logit_bias 15043_1` to decrease likelihood of token ' Hello' ---grammar GRAMMAR BNF_like grammar to constrain generations (see samples in grammars/ dir) ---grammar_file FNAME file to read grammar from ---cfg_negative_prompt PROMPT - negative prompt to use for guidance. (default: empty) ---cfg_negative_prompt_file FNAME - negative prompt file to use for guidance. (default: empty) ---cfg_scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); ---rope_scale N RoPE context linear scaling factor, inverse of --rope_freq_scale ---rope_freq_base N RoPE base frequency, used by NTK_aware scaling (default: loaded from model) ---rope_freq_scale N RoPE frequency linear scaling factor (default: loaded from model) ---ignore_eos ignore end of stream token and continue generating (implies --logit_bias 2_inf) ---no_penalize_nldo not penalize newline token ---memory_f32 use f32 instead of f16 for memory key+value (default: disabled) - not recommended: doubles context memory required and no measurable increase in quality ---temp N temperature (default: %.1f)\n", (double)params.temp); ---logits_all return logits for all tokens in the batch (default: disabled) ---hellaswag compute HellaSwag score over random tasks from datafile supplied with -f ---hellaswag_tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); ---keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); ---draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft); ---chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); --np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel); --ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences); --cb, --cont_batching enable continuous batching (a.k.a dynamic batching) (default: disabled) - if (llama_mlock_supported()) { - --mlock force system to keep model in RAM rather than swapping or compressing - } - if (llama_mmap_supported()) { - --no_mmap do not memory_map model (slower load but may reduce pageouts if not using mlock) - } ---numa attempt optimizations that help on some NUMA systems - if run without this previously, it is recommended to drop the system page cache before using this - see https://github.com/ggerganov/llama.cpp/issues/1437 -#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD --ngl N, --n_gpu_layers N - number of layers to store in VRAM --ngld N, --n_gpu_layers_draft N - number of layers to store in VRAM for the draft model --ts SPLIT --tensor_split SPLIT - how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1 --mg i, --main_gpu i the GPU to use for scratch and small tensors -#ifdef GGML_USE_CUBLAS --nommq, --no_mul_mat_q - use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels. - Not recommended since this is both slower and uses more VRAM. -#endif // GGML_USE_CUBLAS -#endif ---verbose_promptprint prompt before generation - fprintf(stderr, " --simple_io use basic IO for better compatibility in subprocesses and limited consoles ---lora FNAME apply LoRA adapter (implies --no_mmap) ---lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap) ---lora_base FNAME optional model to use as a base for the layers modified by the LoRA adapter --m FNAME, --model FNAME - model path (default: %s)\n", params.model.c_str()); --md FNAME, --model_draft FNAME - draft model for speculative decoding (default: %s)\n", params.model.c_str()); --ld LOGDIR, --logdir LOGDIR - path under which to save YAML logs (no logging if unset) \ No newline at end of file + -h, --help show this help message and exit\n" + -i, --interactive run in interactive mode\n" + --interactive_first run in interactive mode and wait for input right away\n" + -ins, --instruct run in instruction mode (use with Alpaca models)\n" + --multiline_input allows you to write or paste multiple lines without ending each in '\\'\n" + -r PROMPT, --reverse_prompt PROMPT\n" + halt generation at PROMPT, return control in interactive mode\n" + (can be specified more than once for multiple prompts).\n" + --color colorise output to distinguish prompt and user input from generations\n" + -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n" + -t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads + -tb N, --threads_batch N\n" + number of threads to use during batch and prompt processing (default: same as --threads)\n" + -p PROMPT, --prompt PROMPT\n" + prompt to start generation with (default: empty)\n" + -e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n" + --prompt_cache FNAME file to cache prompt state for faster startup (default: none)\n" + --prompt_cache_all if specified, saves user input and generations to cache as well.\n" + not supported with --interactive or other interactive options\n" + --prompt_cache_ro if specified, uses the prompt cache but does not update it.\n" + --random_prompt start with a randomized prompt.\n" + --in_prefix_bos prefix BOS to user inputs, preceding the `--in_prefix` string\n" + --in_prefix STRING string to prefix user inputs with (default: empty)\n" + --in_suffix STRING string to suffix after user inputs with (default: empty)\n" + -f FNAME, --file FNAME\n" + prompt file to start generation.\n" + -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict + -c N, --ctx_size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx + -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch + --top_k N top_k sampling (default: %d, 0 = disabled)\n", params.top_k + --top_p N top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p + --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z + --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p + --repeat_last_n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n + --repeat_penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty + --presence_penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty + --frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty + --mirostat N use Mirostat sampling.\n" + Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n" + (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat + --mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta + --mirostat_ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau + -l T, --logit_bias T T = TOKEN_ID(plus/minus)BIAS\n" + modifies the likelihood of token appearing in the completion,\n" + i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello',\n" + or `--logit_bias 15043_1` to decrease likelihood of token ' Hello'\n" + --grammar GRAMMAR BNF_like grammar to constrain generations (see samples in grammars/ dir)\n" + --grammar_file FNAME file to read grammar from\n" + --cfg_negative_prompt PROMPT\n" + negative prompt to use for guidance. (default: empty)\n" + --cfg_negative_prompt_file FNAME\n" + negative prompt file to use for guidance. (default: empty)\n" + --cfg_scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale + --rope_scale N RoPE context linear scaling factor, inverse of --rope_freq_scale\n" + --rope_freq_base N RoPE base frequency, used by NTK_aware scaling (default: loaded from model)\n" + --rope_freq_scale N RoPE frequency linear scaling factor (default: loaded from model)\n" + --ignore_eos ignore end of stream token and continue generating (implies --logit_bias 2_inf)\n" + --no_penalize_nl do not penalize newline token (default is DO penalise nl token)\n" + --memory_f32 use f32 instead of f16 for memory key+value (default: disabled)\n" + not recommended: doubles context memory required and no measurable increase in quality\n" + --temp N temperature (default: %.1f)\n", (double)params.temp + --logits_all return logits for all tokens in the batch (default: disabled)\n" + --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n" + --hellaswag_tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks + --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep + --draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft + --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks + -np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel + -ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences + -cb, --cont_batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n" + --mlock force system to keep model in RAM rather than swapping or compressing\n" + --no_mmap do not memory_map model (slower load but may reduce pageouts if not using mlock)\n" + --numa attempt optimizations that help on some NUMA systems\n" + if run without this previously, it is recommended to drop the system page cache before using this\n" + see https://github.com/ggerganov/llama.cpp/issues/1437\n" + -ngl N, --n_gpu_layers N\n" + number of layers to store in VRAM\n" + -ngld N, --n_gpu_layers_draft N\n" + number of layers to store in VRAM for the draft model\n" + -ts SPLIT --tensor_split SPLIT\n" + how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1\n" + -mg i, --main_gpu i the GPU to use for scratch and small tensors\n" + -nommq, --no_mul_mat_q\n" + use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.\n" + Not recommended since this is both slower and uses more VRAM.\n" + --verbose_prompt print prompt before generation\n" + --lora FNAME apply LoRA adapter (implies --no_mmap)\n" + --lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap)\n" + --lora_base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n" + -m FNAME, --model FNAME\n" + model path (default: %s)\n", params.model.c_str() + -md FNAME, --model_draft FNAME\n" + draft model for speculative decoding (default: %s)\n", params.model.c_str() + -ld LOGDIR, --logdir LOGDIR\n" + path under which to save YAML logs (no logging if unset)\n" + --ppl_stride stride for ppl calcs. 0 (default): the pre_existing approach will be used.\n" + --ppl_output_type 0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n" + --embedding 0 (default): get only sentence embedding\n" + --beams N 0 (default): if non_zero use beam search of given width N.\n" + --memory_f32 0 (default): if true (= 1) disable f16 memory.\n" + --no_mmap 0 (default): if true use mmap for faster loads.\n" + --mlock 0 (default): if true keep model in memory.\n" + --use_color 0 (default): use color to distinguish generations from inputs\n" + --nprobs N if > 0 output the probabilities of the top N tokens\n" From 32bdf0ee4b5f602dc940136ebbc44d0d0cd8b786 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 09:10:07 +0100 Subject: [PATCH 18/34] Final reconciliation --- common/common.cpp | 10 ++++++---- help_list.txt | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 4d4ecf03df092..0b295f41d20b5 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -622,7 +622,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { } // There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too): -// --embedding, --beams, --ppl-stride, --ppl-output-type, memory-f32, no-mmap, mlock, use-color, nprobs +// --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf("usage: %s [options]\n", argv[0]); printf("\n"); @@ -735,11 +735,13 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --ppl-output-type 0 (default): ppl output as usual, 1: ppl output num_tokens, one per line\n"); printf(" --embedding 0 (default): get only sentence embedding\n"); printf(" --beams N 0 (default): if non-zero use beam search of given width N.\n"); - printf(" --memory-f32 0 (default): if true (= 1) disable f16 memory.\n"); + printf(" --memory-f32 0 (default): if true (= 1) disable f16 memory.\n"); printf(" --no-mmap 0 (default): if true use mmap for faster loads.\n"); - printf(" --mlock 0 (default): if true keep model in memory.\n"); + printf(" --mlock 0 (default): if true keep model in memory.\n"); printf(" --use-color 0 (default): use color to distinguish generations from inputs\n"); - printf(" --nprobs N if > 0 output the probabilities of the top N tokens\n"); + printf(" --nprobs N if > 0 output the probabilities of the top N tokens\n"); + printf(" --alias model alias (default: 'unknown')\n"); + printf(" --infill 0 (defaut) use infill mode\n"); printf("\n"); } diff --git a/help_list.txt b/help_list.txt index 97b91a98259d5..38e0f6f1d8591 100644 --- a/help_list.txt +++ b/help_list.txt @@ -102,3 +102,5 @@ --mlock 0 (default): if true keep model in memory.\n" --use_color 0 (default): use color to distinguish generations from inputs\n" --nprobs N if > 0 output the probabilities of the top N tokens\n" + --alias model alias (default: 'unknown')\n" + --infill 0 (defaut) use infill mode\n" From 2e17fcfdbaa99e61171b5fe1c342e61d69f99d87 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 09:46:26 +0100 Subject: [PATCH 19/34] Comment in common.cpp --- common/common.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/common/common.cpp b/common/common.cpp index 0b295f41d20b5..927a4d9b4fda8 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -623,6 +623,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { // There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too): // --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill +// some corresponding changes to the sequence of fprintf() code may be needed void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf("usage: %s [options]\n", argv[0]); printf("\n"); From 3e4de67fdd3d1f0ab89915d4593fc648188e9266 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 10:01:21 +0100 Subject: [PATCH 20/34] Update find_implemented_args.py --- examples/cmap-example/find_implemented_args.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py index ac6e6cfdc8695..219a05fc6d194 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find_implemented_args.py @@ -1,4 +1,7 @@ -# search the specified directory for files that include argv[i] == '-f' or '--file' arguments +# search the specified directory for files that include command-line arguments +# these are almost always in the form params.argument; "logit_bias" is one exception +# have yet to investigate fully what "lora_adapter" in server.cpp does since it is not apparently +# accessible from the command-line arg/parameter sequence. import os import re @@ -86,6 +89,8 @@ def title_print(filename): print(f"Filename: \033[32m{title}\033[0m") print("#"*(10+len(title))) +# list all the equivalences between declarations in common.h and common.cpp that defines the help +# these are used to substitute the searched params.attributes (keys) with help attributes (values) def substitution_list(parameters): # store untrapped parameters as identicals in case we need to change them later sub_dict = {"n_threads": "threads", From 990e8cb329cacf2137cdf4e9b4815eaeeb93c054 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 10:36:11 +0100 Subject: [PATCH 21/34] New comment --- examples/cmap-example/find_implemented_args.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py index 219a05fc6d194..2d86e8bc95ccb 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find_implemented_args.py @@ -2,6 +2,7 @@ # these are almost always in the form params.argument; "logit_bias" is one exception # have yet to investigate fully what "lora_adapter" in server.cpp does since it is not apparently # accessible from the command-line arg/parameter sequence. +# there is also an issue with -ngl which does not appear in some help menus even when apparently implemented, e.g. in parallel.cpp import os import re From 51446bf9219fe5e8d12f5ca9e1342677cc40f527 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 13:35:52 +0100 Subject: [PATCH 22/34] Naming convention --- ..._implemented_args.py => find-implemented-args.py} | 12 ++++++------ .../{read_common_h.py => readcommonh.py} | 0 2 files changed, 6 insertions(+), 6 deletions(-) rename examples/cmap-example/{find_implemented_args.py => find-implemented-args.py} (96%) rename examples/cmap-example/{read_common_h.py => readcommonh.py} (100%) diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find-implemented-args.py similarity index 96% rename from examples/cmap-example/find_implemented_args.py rename to examples/cmap-example/find-implemented-args.py index 2d86e8bc95ccb..38a83b9418127 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find-implemented-args.py @@ -8,7 +8,7 @@ import re import collections import re -import read_common_h +import readcommonh # update the source file - usually 'help_list.txt', so the default - in case the source file has been changed def update_file(file_from, file_to = "help_list.txt"): @@ -177,8 +177,8 @@ def find_parameters(file, sorted_result): # now do it the new way print("\nNow we extract the original gpt_params definition from common.h with the defaults for implemented arguments:\n") gpt_count = 0 - for k,v in read_common_h.parameters.items(): - if not read_common_h.parameters.items(): + for k,v in readcommonh.parameters.items(): + if not readcommonh.parameters.items(): print(f" \033[032mNone\033[0m\n") elif k in arguments: # print(f"gpt_params: \033[33m{k:>20}\033[0m values: {v}") @@ -190,9 +190,9 @@ def find_parameters(file, sorted_result): print("\nSearching the other way round is more efficient:\n") key_count = 0 for argument in set(arguments): - if argument in read_common_h.parameters: + if argument in readcommonh.parameters: key_count += 1 - print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(read_common_h.parameters[argument]):<60}; default: {read_common_h.parameters[argument][1]:<10}") + print(f"{key_count:>2} key: {argument:>25}; role: {concatenate(readcommonh.parameters[argument]):<60}; default: {readcommonh.parameters[argument][1]:<10}") if help_count == gpt_count and gpt_count == key_count: print(f"\n\033[032mNo unresolved help-list incompatibilities with \033[33m{filename.split('/')[-1]}\033[0m") else: @@ -207,7 +207,7 @@ def find_parameters(file, sorted_result): update_file("common/common.cpp", "help_list.txt") # get the parameters from the common.h file utiity we import - print(read_common_h.parameters) + print(readcommonh.parameters) # So now we've got the gpt_parameters in this parameters dict # First we alter all the hyphenated help words in help-file.txt to underscores diff --git a/examples/cmap-example/read_common_h.py b/examples/cmap-example/readcommonh.py similarity index 100% rename from examples/cmap-example/read_common_h.py rename to examples/cmap-example/readcommonh.py From 3f07ed90a449512e9e62c7d3a1f11bcef33323ab Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 13:45:12 +0100 Subject: [PATCH 23/34] Added prompt-file to hep --- common/common.cpp | 1 + examples/cmap-example/find-implemented-args.py | 1 + help_list.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/common/common.cpp b/common/common.cpp index 9b29d7532a156..f0147aa76d38c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -748,6 +748,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --nprobs N if > 0 output the probabilities of the top N tokens\n"); printf(" --alias model alias (default: 'unknown')\n"); printf(" --infill 0 (defaut) use infill mode\n"); + printf(" --prompt-file name of external prompt file\n"); printf("\n"); } diff --git a/examples/cmap-example/find-implemented-args.py b/examples/cmap-example/find-implemented-args.py index 38a83b9418127..a0146b7775e20 100644 --- a/examples/cmap-example/find-implemented-args.py +++ b/examples/cmap-example/find-implemented-args.py @@ -106,6 +106,7 @@ def substitution_list(parameters): "n_keep" : "keep", "n_probs" : "nprobs", "path_prompt_cache" : "prompt_cache", + "prompt_file" : "prompt_file", "input_prefix" : "in_prefix", "input_suffix" : "in_suffix", "input_prefix_bos" : "in_prefix_bos", diff --git a/help_list.txt b/help_list.txt index 38e0f6f1d8591..c74199e0e4264 100644 --- a/help_list.txt +++ b/help_list.txt @@ -104,3 +104,4 @@ --nprobs N if > 0 output the probabilities of the top N tokens\n" --alias model alias (default: 'unknown')\n" --infill 0 (defaut) use infill mode\n" + --prompt_file name of external prompt file\n" From 094d6d6e09f52913a7e8ee088c356ae5439af393 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 16:10:10 +0100 Subject: [PATCH 24/34] Add help list --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index f0147aa76d38c..1a6156473ab93 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -627,7 +627,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { } // There were missing items from this list of helps so the wording needs checking (all inserted at the end, so reposition too): -// --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill +// --embedding, --beams, --ppl-stride, --ppl-output-type, --memory-f32, --no-mmap, --mlock, --use-color, --nprobs, --alias, --infill, --prompt-file // some corresponding changes to the sequence of fprintf() code may be needed void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf("usage: %s [options]\n", argv[0]); From 9abc92545cf79faa2b4274c5ee32bc1fbc6491d1 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 16:53:27 +0100 Subject: [PATCH 25/34] Remove trailing ws --- examples/cmap-example/cmap-example.cpp | 4 ++-- .../cmap-example/find-implemented-args.py | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index ece30702aa098..d4214b3bbd0d1 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -24,7 +24,7 @@ int main() { } // Now try the helpdict idea - + printf("Second kind of dictionary\n"); // Create a list of strings @@ -37,7 +37,7 @@ int main() { std::list& fruitsList = helpdict["fruits"]; fruitsList.push_back("grape"); fruitsList.push_back("pineapple"); - + for (const auto& pair : helpdict) { printf("helpdict contains a list of %s\n", pair.first.c_str()); for (const auto& element : pair.second) { diff --git a/examples/cmap-example/find-implemented-args.py b/examples/cmap-example/find-implemented-args.py index a0146b7775e20..6de919097e9f5 100644 --- a/examples/cmap-example/find-implemented-args.py +++ b/examples/cmap-example/find-implemented-args.py @@ -29,10 +29,10 @@ def update_file(file_from, file_to = "help_list.txt"): def replace_dashes_with_underscores(filename): with open(filename, 'r') as file: content = file.read() - + # Match '-' surrounded by word characters on both sides and replace with '_' replaced_content = re.sub(r'(\w)-(\w)', r'\1_\2', content) - + with open(filename, 'w') as file: file.write(replaced_content) @@ -86,9 +86,9 @@ def concatenate(v): def title_print(filename): title = filename.split('/')[-1] - print("\n\n"+"#"*(10+len(title))) + print("\n\n"+"#"*(10+len(title))) print(f"Filename: \033[32m{title}\033[0m") - print("#"*(10+len(title))) + print("#"*(10+len(title))) # list all the equivalences between declarations in common.h and common.cpp that defines the help # these are used to substitute the searched params.attributes (keys) with help attributes (values) @@ -130,7 +130,7 @@ def substitution_list(parameters): new_parameters = [] for parameter in parameters: if parameter in sub_dict: - # we need both for future reference + # we need both for future reference new_parameters.append(parameter) new_parameters.append(sub_dict[parameter]) else: @@ -154,7 +154,7 @@ def find_parameters(file, sorted_result): # bit of a hack to exclude --attributes at the end of help comment lines if re.search(pattern, line[:50]): parameters.append(line) - + all_parameters = set(parameters) title_print(filename) @@ -162,8 +162,8 @@ def find_parameters(file, sorted_result): if not all_parameters: print(f" \033[032mNone\033[0m\n") - - # first do it the original way + + # first do it the original way else: help_count = 0 for parameter in all_parameters: @@ -186,7 +186,7 @@ def find_parameters(file, sorted_result): concatenated_element = concatenate(v) gpt_count += 1 print(f"{gpt_count:>2} gpt_param: \033[32m{k:>19}; \033[34mrole: \033[33m{concatenated_element:<60}\033[0m; \033[34mdefault: \033[30m{v[1]:<10}\033[0m ") - + # searching the other way round is quicker: print("\nSearching the other way round is more efficient:\n") key_count = 0 @@ -203,7 +203,7 @@ def find_parameters(file, sorted_result): directory = '/Users/edsilm2/llama.cpp/examples' if __name__ == '__main__': - + # update the source help file from C++ source (this works exactly as required) update_file("common/common.cpp", "help_list.txt") From 6189a9ef3ab582a6a96f7ed9b84ca89364c98f9b Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 16:55:31 +0100 Subject: [PATCH 26/34] One more trailing ws --- examples/cmap-example/readcommonh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cmap-example/readcommonh.py b/examples/cmap-example/readcommonh.py index 1c18d496002aa..422edea8fd252 100644 --- a/examples/cmap-example/readcommonh.py +++ b/examples/cmap-example/readcommonh.py @@ -25,7 +25,7 @@ if "float>" in parameters and parameters["float>"][1] == 'logit_bias': del parameters["float>"] - # this is a bit of a hack to terminate the harvest + # this is a bit of a hack to terminate the harvest if len(non_whitespace_elements) > 2 and non_whitespace_elements[1] == "infill": inside = False break From 53dbefa2f8583fbece87d7a6e3d3ca3346b1a3f2 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 17:05:24 +0100 Subject: [PATCH 27/34] Added final newline (2) --- examples/cmap-example/cmap-example.cpp | 2 +- examples/cmap-example/find-implemented-args.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index d4214b3bbd0d1..0733952074247 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -141,4 +141,4 @@ int main() { } } return 0; -} \ No newline at end of file +} diff --git a/examples/cmap-example/find-implemented-args.py b/examples/cmap-example/find-implemented-args.py index 6de919097e9f5..c48c8c5a3d82c 100644 --- a/examples/cmap-example/find-implemented-args.py +++ b/examples/cmap-example/find-implemented-args.py @@ -225,4 +225,4 @@ def find_parameters(file, sorted_result): print("\n######################## find help context parameters #################################") # analyse the files and what they contain - find_parameters("help_list.txt", sorted) \ No newline at end of file + find_parameters("help_list.txt", sorted) From ba324022917f39bd849dbce73da9728957e82090 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 20:38:00 +0100 Subject: [PATCH 28/34] creadcommonh --- examples/cmap-example/cmap-example.cpp | 90 ++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index 0733952074247..3fa90a726a326 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -7,6 +7,12 @@ #include #include +#include +#include +#include +#include +#include + int main() { std::map dict; std::map> helpdict; @@ -142,3 +148,87 @@ int main() { } return 0; } + +std::vector splitString(const std::string& str, const std::string& delimiter) { + std::vector tokens; + std::size_t start = 0, end = 0; + while ((end = str.find(delimiter, start)) != std::string::npos) { + tokens.push_back(str.substr(start, end - start)); + start = end + delimiter.length(); + } + tokens.push_back(str.substr(start)); + return tokens; +} + +std::unordered_map> extractParameters() { + std::ifstream file("common/common.h"); + std::string line; + std::vector lines; + while (std::getline(file, line)) { + lines.push_back(line); + } + + std::unordered_map> parameters; + parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "probability", "of", "particular", "words"); + + bool inside = false; + for (const std::string& line : lines) { + std::vector nonWhitespaceElements = splitString(line, " "); + std::cout << "nwe = \033[33m"; + for (const std::string& element : nonWhitespaceElements) { + std::cout << element << " "; + } + std::cout << "\033[0m" << std::endl; + + if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { + inside = true; + } + + if (nonWhitespaceElements.size() > 2 && inside) { + // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite + // Here we deliberately add back the key so we can manually change it when it is different + parameters[nonWhitespaceElements[1]] = nonWhitespaceElements; + + // Remove spurious entry caused by eccentric status of logit_bias + if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") { + parameters.erase("float>"); + } + } + + // Terminate the harvest + if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") { + inside = false; + break; + } + } + + for (const auto& pair : parameters) { + const std::string& key = pair.first; + const std::vector& value = pair.second; + std::cout << "key: " << std::left << std::setw(20) << key << "; values: "; + for (const std::string& element : value) { + std::cout << element << " "; + } + std::cout << std::endl; + + std::string concatenatedElement = ""; + for (std::size_t i = 0; i < value.size(); i++) { + if (value[i] == "//") { + concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string("")); + // break; + } + } + + std::cout << std::string(10, ' '); + std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m" + << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" + << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; + } + + return parameters; +} + +int main() { + std::unordered_map> parameters = extractParameters(); + return 0; +} From 47675b920037b41fd67ee7c62d309b26bad84aa8 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 21:00:49 +0100 Subject: [PATCH 29/34] experiment in C --- examples/cmap-example/cmap-example.cpp | 46 +++++++------ examples/cmap-example/creadcommonh.cpp | 91 ++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 24 deletions(-) create mode 100644 examples/cmap-example/creadcommonh.cpp diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index 3fa90a726a326..0719fc95f4759 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -146,32 +146,31 @@ int main() { printf("\n"); } } - return 0; -} -std::vector splitString(const std::string& str, const std::string& delimiter) { - std::vector tokens; - std::size_t start = 0, end = 0; - while ((end = str.find(delimiter, start)) != std::string::npos) { - tokens.push_back(str.substr(start, end - start)); - start = end + delimiter.length(); + std::vector splitString(const std::string& str, const std::string& delimiter) { + std::vector tokens; + std::size_t start = 0, end = 0; + while ((end = str.find(delimiter, start)) != std::string::npos) { + tokens.push_back(str.substr(start, end - start)); + start = end + delimiter.length(); + } + tokens.push_back(str.substr(start)); + return tokens; } - tokens.push_back(str.substr(start)); - return tokens; -} -std::unordered_map> extractParameters() { - std::ifstream file("common/common.h"); - std::string line; - std::vector lines; - while (std::getline(file, line)) { - lines.push_back(line); - } + std::unordered_map> extractParameters() { + std::ifstream file("common/common.h"); + std::string line; + std::vector lines; + while (std::getline(file, line)) { + lines.push_back(line); + } std::unordered_map> parameters; parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "probability", "of", "particular", "words"); bool inside = false; + for (const std::string& line : lines) { std::vector nonWhitespaceElements = splitString(line, " "); std::cout << "nwe = \033[33m"; @@ -221,14 +220,13 @@ std::unordered_map> extractParameters() { std::cout << std::string(10, ' '); std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m" - << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" - << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; + << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" + << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; + } + return parameters; } - return parameters; -} - -int main() { std::unordered_map> parameters = extractParameters(); + return 0; } diff --git a/examples/cmap-example/creadcommonh.cpp b/examples/cmap-example/creadcommonh.cpp new file mode 100644 index 0000000000000..7e8ac67f4d5e9 --- /dev/null +++ b/examples/cmap-example/creadcommonh.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include + +std::vector splitString(const std::string& str, const std::string& delimiter) { + std::vector tokens; + std::size_t start = 0, end = 0; + while ((end = str.find(delimiter, start)) != std::string::npos) { + tokens.push_back(str.substr(start, end - start)); + start = end + delimiter.length(); + } + tokens.push_back(str.substr(start)); + return tokens; +} + +std::unordered_map> extractParameters() { + std::ifstream file("common/common.h"); + std::string line; + std::vector lines; + while (std::getline(file, line)) { + lines.push_back(line); + } + + std::unordered_map> parameters; + parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"); + + bool inside = false; + for (const std::string& line : lines) { + std::vector nonWhitespaceElements = splitString(line, " "); + std::cout << "nwe = \033[33m"; + for (const std::string& element : nonWhitespaceElements) { + std::cout << element << " "; + } + std::cout << "\033[0m" << std::endl; + + if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { + inside = true; + } + + if (nonWhitespaceElements.size() > 2 && inside) { + // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite + // Here we deliberately add back the key so we can manually change it when it is different + parameters[nonWhitespaceElements[1]] = nonWhitespaceElements; + + // Remove spurious entry caused by eccentric status of logit_bias + if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") { + parameters.erase("float>"); + } + } + + // Terminate the harvest + if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") { + inside = false; + break; + } + } + + for (const auto& pair : parameters) { + const std::string& key = pair.first; + const std::vector& value = pair.second; + std::cout << "key: " << std::left << std::setw(20) << key << "; values: "; + for (const std::string& element : value) { + std::cout << element << " "; + } + std::cout << std::endl; + + std::string concatenatedElement = ""; + for (std::size_t i = 0; i < value.size(); i++) { + if (value[i] == "//") { + concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string("")); + // break; + } + } + + std::cout << std::string(10, ' '); + std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m" + << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" + << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; + } + + return parameters; +} + +// everything above is called from here as 'extractParameters()' +int main() { + std::unordered_map> parameters = extractParameters(); + return 0; +} From bddd099edff632c4b80b94a3ff9a70bf9b498e70 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 22:17:17 +0100 Subject: [PATCH 30/34] experimental --- examples/cmap-example/cmap-example.cpp | 160 +++++++++++++------------ examples/cmap-example/creadcommonh.cpp | 3 +- 2 files changed, 83 insertions(+), 80 deletions(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index 0719fc95f4759..1487a61abe808 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -12,6 +12,86 @@ #include #include #include +#include + +std::vector splitString(const std::string& str, const std::string& delimiter) { + std::vector tokens; + std::size_t start = 0, end = 0; + while ((end = str.find(delimiter, start)) != std::string::npos) { + tokens.push_back(str.substr(start, end - start)); + start = end + delimiter.length(); + } + tokens.push_back(str.substr(start)); + return tokens; +} + +std::unordered_map> extractParameters() { + std::ifstream file("common/common.h"); + std::string line; + std::vector lines; + while (std::getline(file, line)) { + lines.push_back(line); + } + + std::unordered_map> parameters; + parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; + + bool inside = false; + for (const std::string& line : lines) { + std::vector nonWhitespaceElements = splitString(line, " "); + std::cout << "nwe = \033[33m"; + for (const std::string& element : nonWhitespaceElements) { + std::cout << element << " "; + } + std::cout << "\033[0m" << std::endl; + + if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { + inside = true; + } + + if (nonWhitespaceElements.size() > 2 && inside) { + // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite + // Here we deliberately add back the key so we can manually change it when it is different + parameters[nonWhitespaceElements[1]] = nonWhitespaceElements; + + // Remove spurious entry caused by eccentric status of logit_bias + if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") { + parameters.erase("float>"); + } + } + + // Terminate the harvest + if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") { + inside = false; + break; + } + } + + for (const auto& pair : parameters) { + const std::string& key = pair.first; + const std::vector& value = pair.second; + std::cout << "key: " << std::left << std::setw(20) << key << "; values: "; + for (const std::string& element : value) { + std::cout << element << " "; + } + std::cout << std::endl; + + std::string concatenatedElement = ""; + for (std::size_t i = 0; i < value.size(); i++) { + if (value[i] == "//") { + concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string("")); + // break; + } + } + + std::cout << std::string(10, ' '); + std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m" + << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" + << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; + } + + return parameters; +} int main() { std::map dict; @@ -146,85 +226,7 @@ int main() { printf("\n"); } } - - std::vector splitString(const std::string& str, const std::string& delimiter) { - std::vector tokens; - std::size_t start = 0, end = 0; - while ((end = str.find(delimiter, start)) != std::string::npos) { - tokens.push_back(str.substr(start, end - start)); - start = end + delimiter.length(); - } - tokens.push_back(str.substr(start)); - return tokens; - } - - std::unordered_map> extractParameters() { - std::ifstream file("common/common.h"); - std::string line; - std::vector lines; - while (std::getline(file, line)) { - lines.push_back(line); - } - - std::unordered_map> parameters; - parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "probability", "of", "particular", "words"); - - bool inside = false; - - for (const std::string& line : lines) { - std::vector nonWhitespaceElements = splitString(line, " "); - std::cout << "nwe = \033[33m"; - for (const std::string& element : nonWhitespaceElements) { - std::cout << element << " "; - } - std::cout << "\033[0m" << std::endl; - - if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { - inside = true; - } - - if (nonWhitespaceElements.size() > 2 && inside) { - // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite - // Here we deliberately add back the key so we can manually change it when it is different - parameters[nonWhitespaceElements[1]] = nonWhitespaceElements; - - // Remove spurious entry caused by eccentric status of logit_bias - if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") { - parameters.erase("float>"); - } - } - - // Terminate the harvest - if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") { - inside = false; - break; - } - } - - for (const auto& pair : parameters) { - const std::string& key = pair.first; - const std::vector& value = pair.second; - std::cout << "key: " << std::left << std::setw(20) << key << "; values: "; - for (const std::string& element : value) { - std::cout << element << " "; - } - std::cout << std::endl; - - std::string concatenatedElement = ""; - for (std::size_t i = 0; i < value.size(); i++) { - if (value[i] == "//") { - concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string("")); - // break; - } - } - - std::cout << std::string(10, ' '); - std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m" - << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" - << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; - } - return parameters; - } + // now process the extra bit inserted to replicate readcommonh.py std::unordered_map> parameters = extractParameters(); diff --git a/examples/cmap-example/creadcommonh.cpp b/examples/cmap-example/creadcommonh.cpp index 7e8ac67f4d5e9..59e1e94ab6600 100644 --- a/examples/cmap-example/creadcommonh.cpp +++ b/examples/cmap-example/creadcommonh.cpp @@ -4,6 +4,7 @@ #include #include #include +#include std::vector splitString(const std::string& str, const std::string& delimiter) { std::vector tokens; @@ -25,7 +26,7 @@ std::unordered_map> extractParameters() { } std::unordered_map> parameters; - parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"); + parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; bool inside = false; for (const std::string& line : lines) { From 301f13cf9b37a12f8b9f7c810733f9ad652c77b4 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 22:49:57 +0100 Subject: [PATCH 31/34] Compile new cmap-example --- examples/CMakeLists.txt | 1 + examples/cmap-example/cmap-example.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index de4cf7a691768..b32706b0337e0 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -30,6 +30,7 @@ else() add_subdirectory(embd-input) add_subdirectory(llama-bench) add_subdirectory(beam-search) + add_subdirectory(cmap-example) if (LLAMA_METAL) add_subdirectory(metal) endif() diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index 1487a61abe808..5d2f39762b299 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -36,7 +36,7 @@ std::unordered_map> extractParameters() { std::unordered_map> parameters; parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; - bool inside = false; + bool inside = false; for (const std::string& line : lines) { std::vector nonWhitespaceElements = splitString(line, " "); std::cout << "nwe = \033[33m"; From 759973be79ee1777314392ba5a9f33696edf842c Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 9 Oct 2023 23:11:43 +0100 Subject: [PATCH 32/34] printf format --- examples/cmap-example/cmap-example.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index 5d2f39762b299..cfac88860093d 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -39,11 +39,11 @@ std::unordered_map> extractParameters() { bool inside = false; for (const std::string& line : lines) { std::vector nonWhitespaceElements = splitString(line, " "); - std::cout << "nwe = \033[33m"; + printf("nwe = \033[33m"); for (const std::string& element : nonWhitespaceElements) { - std::cout << element << " "; + printf("%s ", element); } - std::cout << "\033[0m" << std::endl; + printf("\033[0m\n"); if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { inside = true; @@ -70,11 +70,11 @@ std::unordered_map> extractParameters() { for (const auto& pair : parameters) { const std::string& key = pair.first; const std::vector& value = pair.second; - std::cout << "key: " << std::left << std::setw(20) << key << "; values: "; + printf("key: %s; values: ", key); for (const std::string& element : value) { - std::cout << element << " "; + printf("%s ", element); } - std::cout << std::endl; + printf("\n"); std::string concatenatedElement = ""; for (std::size_t i = 0; i < value.size(); i++) { @@ -84,10 +84,7 @@ std::unordered_map> extractParameters() { } } - std::cout << std::string(10, ' '); - std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m" - << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" - << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; + printf("parameter: \033[32m key: \033[34m%s default: \033[30m%s \033[34mcomment: \033[33m%s\033[0m\n", key, value[1], concatenatedElement); } return parameters; From 37050ba90a9fe47bbfa593278eeba3fa0303cc90 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Tue, 10 Oct 2023 13:33:12 +0100 Subject: [PATCH 33/34] Updated cmap-example --- examples/cmap-example/cmap-example.cpp | 245 +++++++------------------ examples/cmap-example/creadcommonh.cpp | 92 ---------- 2 files changed, 69 insertions(+), 268 deletions(-) delete mode 100644 examples/cmap-example/creadcommonh.cpp diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index cfac88860093d..1c39bd2bdbd52 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -1,31 +1,61 @@ // example of a C/C++ equivalent data structure to the python dict -// there are two: std::map automatically sorts on key; std::unordered_map does not #include #include #include #include #include - -#include +#include +#include #include #include #include -#include +// there may be good reasons not to sort the parameters, but here we use map +#include #include -std::vector splitString(const std::string& str, const std::string& delimiter) { +std::vector split_string(const std::string& str, const std::string& delimiter) { std::vector tokens; std::size_t start = 0, end = 0; + bool inside_tags = false; // flag to track if we are inside "<>" + while ((end = str.find(delimiter, start)) != std::string::npos) { - tokens.push_back(str.substr(start, end - start)); + std::string token = str.substr(start, end - start); + + // if (!token.empty()) { // Add condition to exclude empty substrings + // tokens.push_back(token); + + if (!inside_tags && !token.empty()) { // Add condition to exclude empty substrings and if not inside "<>" + tokens.push_back(token); + } + // deal with cases where the split character occurs inside <> + // Update inside_tags flag based on "<>" + size_t open_tag_pos = str.find("<", start); + size_t close_tag_pos = str.find(">", start); + if (open_tag_pos != std::string::npos && close_tag_pos != std::string::npos && open_tag_pos < end) { + inside_tags = true; + } else if (close_tag_pos != std::string::npos && close_tag_pos < end) { + inside_tags = false; + } start = end + delimiter.length(); } tokens.push_back(str.substr(start)); return tokens; } -std::unordered_map> extractParameters() { +void print_parameters(const std::map>& parameters) { + for (const auto& pair : parameters) { + const std::string& key = pair.first; + const std::vector& value = pair.second; // usually has multiple elements + printf("key: %25s: values: ", key.c_str()); + for (const std::string& element : value) { + printf("%s ", element.c_str()); + } + printf("\n"); + } +} + +std::map> extract_parameters() { std::ifstream file("common/common.h"); std::string line; std::vector lines; @@ -33,199 +63,62 @@ std::unordered_map> extractParameters() { lines.push_back(line); } - std::unordered_map> parameters; - parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; + std::map> parameters; + // fix up failure to match logit_bias; may also need to add lora_adapter; now dealt with and ready for deletion + // parameters["logit_bias"] = {"std::unordered_map" "logit_bias", "=", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; + // parameters["lora_adapter"] = {"std::vector>", "lora_adapter", "=", "", "//", "lora", "adapter", "path", "with", "user-defined", "scale"}; + // are we inside gpt_params? + // this for loop finds all the params inside struct gpt-params bool inside = false; for (const std::string& line : lines) { - std::vector nonWhitespaceElements = splitString(line, " "); - printf("nwe = \033[33m"); - for (const std::string& element : nonWhitespaceElements) { - printf("%s ", element); + std::vector nws_elements = split_string(line, " "); + printf("nwe = "); + for (const std::string& element : nws_elements) { + printf("%s ", element.c_str()); } - printf("\033[0m\n"); + printf("\n"); - if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { + if (!nws_elements.empty() && nws_elements[0] == "struct" && nws_elements[1] == "gpt_params") { inside = true; } - if (nonWhitespaceElements.size() > 2 && inside) { - // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite - // Here we deliberately add back the key so we can manually change it when it is different - parameters[nonWhitespaceElements[1]] = nonWhitespaceElements; + if (nws_elements.size() > 2 && inside) { + // cannot use nwe[0] as key because types do not generate unique keys and so overwrite + // Here we deliberately add back the key so we can manually change it when it is different (remove eventually) + // parameters[nws_elements[1]] = nws_elements; + std::vector copy = nws_elements; // Create a copy of nws_elements + parameters[nws_elements[1]] = copy; // Assign the copy to parameters // Remove spurious entry caused by eccentric status of logit_bias - if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") { + if (parameters.count("float>") && parameters["float>"][2] == "logit_bias;") { parameters.erase("float>"); } + // Remove spurious entry caused by eccentric status of lora_adapter + if (parameters.count("float>>") && parameters["float>>"][2] == "lora_adapter;") { + parameters.erase("float>>"); + } } - // Terminate the harvest - if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") { + // Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now + if (nws_elements.size() > 2 && nws_elements[1] == "infill") { inside = false; break; - } - } - - for (const auto& pair : parameters) { - const std::string& key = pair.first; - const std::vector& value = pair.second; - printf("key: %s; values: ", key); - for (const std::string& element : value) { - printf("%s ", element); - } - printf("\n"); - - std::string concatenatedElement = ""; - for (std::size_t i = 0; i < value.size(); i++) { - if (value[i] == "//") { - concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string("")); - // break; } } + // now display them (unnecessary operationally; here for development) + print_parameters(parameters); - printf("parameter: \033[32m key: \033[34m%s default: \033[30m%s \033[34mcomment: \033[33m%s\033[0m\n", key, value[1], concatenatedElement); - } - + // return the results (will eventually become a void function) return parameters; } int main() { - std::map dict; - std::map> helpdict; - - dict[std::string("apple")] = 5; - dict[std::string("banana")] = 2; - dict[std::string("orange")] = 7; - - - // Accessing elements in the map - printf("First kind of dictionary\n\nValue of apple: %d\n", dict[std::string("apple")]); - - for (const auto& pair : dict) { - printf("Key: %10s, Value: %4d\n", pair.first.c_str(), pair.second); - } - - // Now try the helpdict idea - - printf("Second kind of dictionary\n"); - - // Create a list of strings - std::list stringList = {"apple", "banana", "orange"}; - - // Add key-value pair to map - helpdict["fruits"] = stringList; - - // Access and modify the list of strings - std::list& fruitsList = helpdict["fruits"]; - fruitsList.push_back("grape"); - fruitsList.push_back("pineapple"); - - for (const auto& pair : helpdict) { - printf("helpdict contains a list of %s\n", pair.first.c_str()); - for (const auto& element : pair.second) { - printf(" %s", element.c_str()); - } - printf("\n"); - } - - // Create a binary key for each value consisting of a list of strings - - std::map> bitdict; - - // Example binary key - int binaryKey1 = 0b0000001; - int binaryKey2 = 0b0000010; - int binaryKey3 = 0b0000100; - int binaryKey4 = 0b0001000; - int binaryKey5 = 0b0010000; - - // Convert binary key to string - std::string keyString1 = std::bitset<8>(binaryKey1).to_string(); - std::string keyString2 = std::bitset<8>(binaryKey2).to_string(); - std::string keyString3 = std::bitset<8>(binaryKey3).to_string(); - std::string keyString4 = std::bitset<8>(binaryKey4).to_string(); - std::string keyString5 = std::bitset<8>(binaryKey5).to_string(); - - // Add key-value pair to map - bitdict[keyString1] = {"-h", "--help", "print this help list and exit"}; - bitdict[keyString2] = {"-f", "FNAME", "--file", "FNAME", "read the prompts from an external text file"}; - bitdict[keyString3] = {"-n", "N", "--n-predict", "N", "number of tokens to predict in generating a completion"}; - bitdict[keyString4] = {"-t", "N", "--threads", "N", "number of threads to use"}; - bitdict[keyString5] = {"-m", "MODELPATH", "--model", "MODELPATH", "path to llama model to use"}; - - - for (const auto& pair : bitdict) { - printf("help dictionary contains a list of arguments specific to this app %s\n", pair.first.substr(pair.first.size() - 5).c_str()); - for (const auto& element : pair.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - - printf("\nThis is the complete help file in this mock-up illustrative example:\n\n"); - for (const auto& pair: bitdict) { - printf("%s ",pair.first.c_str()); - for (const auto& element : pair.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - - // Now we try to use the appcode to select from the help available - // app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101 - - int app1code = 0b0001011; - int app2code = 0b0010111; - - printf("\nNow processing app with only -h, -t and -f implemented and appcode %3d\n", app1code); - if (app1code != 0) { - for (const auto& kvp : bitdict) { - if ((app1code & std::stoi(kvp.first)) != 0) { - printf("%s ",kvp.first.c_str()); - for (const auto& element : kvp.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - } - printf("\n"); - } - - printf("\nNow processing app with only -h, -f, -m and -n implemented and appcode %3d\n", app2code); - if (app2code != 0) { - for (const auto& kvp : bitdict) { - if ((app2code & std::stoi(kvp.first)) != 0) { - printf("%s ",kvp.first.c_str()); - for (const auto& element : kvp.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - } - printf("\n"); - } - - // This is more like the general way to do it - std::vector appcodes = {2, 5, 11, 17, 23, 31}; - for (size_t i = 0; i < appcodes.size(); ++i) { - int x = appcodes[i]; - if (x != 0) { - for (const auto& kvp : bitdict) { - if ((x & std::stoi(kvp.first)) != 0) { - printf("appcode %3d %s ", x, kvp.first.c_str()); - for (const auto& element : kvp.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - } - printf("\n"); - } - } - // now process the extra bit inserted to replicate readcommonh.py - std::unordered_map> parameters = extractParameters(); + // process the code inserted to replicate readcommonh.py + // this does not produce output but here is forced; it just collects the output into parameters and returns 0 + std::map> parameters = extract_parameters(); + print_parameters(parameters); return 0; } diff --git a/examples/cmap-example/creadcommonh.cpp b/examples/cmap-example/creadcommonh.cpp deleted file mode 100644 index 59e1e94ab6600..0000000000000 --- a/examples/cmap-example/creadcommonh.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -std::vector splitString(const std::string& str, const std::string& delimiter) { - std::vector tokens; - std::size_t start = 0, end = 0; - while ((end = str.find(delimiter, start)) != std::string::npos) { - tokens.push_back(str.substr(start, end - start)); - start = end + delimiter.length(); - } - tokens.push_back(str.substr(start)); - return tokens; -} - -std::unordered_map> extractParameters() { - std::ifstream file("common/common.h"); - std::string line; - std::vector lines; - while (std::getline(file, line)) { - lines.push_back(line); - } - - std::unordered_map> parameters; - parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; - - bool inside = false; - for (const std::string& line : lines) { - std::vector nonWhitespaceElements = splitString(line, " "); - std::cout << "nwe = \033[33m"; - for (const std::string& element : nonWhitespaceElements) { - std::cout << element << " "; - } - std::cout << "\033[0m" << std::endl; - - if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { - inside = true; - } - - if (nonWhitespaceElements.size() > 2 && inside) { - // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite - // Here we deliberately add back the key so we can manually change it when it is different - parameters[nonWhitespaceElements[1]] = nonWhitespaceElements; - - // Remove spurious entry caused by eccentric status of logit_bias - if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") { - parameters.erase("float>"); - } - } - - // Terminate the harvest - if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") { - inside = false; - break; - } - } - - for (const auto& pair : parameters) { - const std::string& key = pair.first; - const std::vector& value = pair.second; - std::cout << "key: " << std::left << std::setw(20) << key << "; values: "; - for (const std::string& element : value) { - std::cout << element << " "; - } - std::cout << std::endl; - - std::string concatenatedElement = ""; - for (std::size_t i = 0; i < value.size(); i++) { - if (value[i] == "//") { - concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string("")); - // break; - } - } - - std::cout << std::string(10, ' '); - std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m" - << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" - << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; - } - - return parameters; -} - -// everything above is called from here as 'extractParameters()' -int main() { - std::unordered_map> parameters = extractParameters(); - return 0; -} From 37ae96c712954b92420da6ebbbadc54761e3f42f Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Tue, 10 Oct 2023 13:50:28 +0100 Subject: [PATCH 34/34] Correct bug --- examples/cmap-example/cmap-example.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index 1c39bd2bdbd52..d06699c864b94 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -1,4 +1,4 @@ -// example of a C/C++ equivalent data structure to the python dict +// example of a C/C++ equivalent data structure to the python dict in readcommonh.py #include #include @@ -101,7 +101,7 @@ std::map> extract_parameters() { } // Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now - if (nws_elements.size() > 2 && nws_elements[1] == "infill") { + if (nws_elements.size() > 2 && nws_elements[2] == "infill") { inside = false; break; }