Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 7 additions & 11 deletions convert_hf_to_gguf_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import re

import requests
import sys
import json
import shutil
import argparse
Expand Down Expand Up @@ -69,8 +68,7 @@ class TOKENIZER_TYPE(IntEnum):
hf_token = args.hf_token if args.hf_token is not None else hf_token

if hf_token is None:
logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token")
sys.exit(1)
logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token")

# TODO: this string has to exercise as much pre-tokenizer functionality as possible
# will be updated with time - contributions welcome
Expand Down Expand Up @@ -151,7 +149,7 @@ class TOKENIZER_TYPE(IntEnum):


def download_file_with_auth(url, token, save_path):
headers = {"Authorization": f"Bearer {token}"}
headers = {"Authorization": f"Bearer {token}"} if token else None
response = sess.get(url, headers=headers)
response.raise_for_status()
os.makedirs(os.path.dirname(save_path), exist_ok=True)
Expand Down Expand Up @@ -250,20 +248,18 @@ def get_existing_models(convert_py):
else:
# otherwise, compute the hash of the tokenizer

# Skip if the tokenizer folder does not exist or there are other download issues previously
if not os.path.exists(f"models/tokenizers/{name}"):
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
continue
# Fail if the tokenizer folder with config does not exist or there are other download issues previously
if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"):
raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.")

try:
logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
if name == "t5":
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
else:
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
except OSError as e:
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
continue # Skip to the next model if the tokenizer can't be loaded
except Exception as e:
raise OSError(f"Error loading tokenizer for model {name}.") from e

chktok = tokenizer.encode(CHK_TXT)
chkhsh = sha256(str(chktok).encode()).hexdigest()
Expand Down