You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
logger.info("Using default batch size of %d for text. Adjust with --batch-size if you encounter memory issues or want to speed up processing.", batch_size)
logger.info("Running embedding for %d images...", len(images))
182
+
# Set default batch size if not provided
183
+
ifbatch_sizeisNone:
184
+
batch_size=16
185
+
logger.info("Using default batch size of %d for images. Adjust with --batch-size if you encounter memory issues or want to speed up processing.", batch_size)
186
+
187
+
logger.info("Running embedding for %d images with batch size %d...", len(images), batch_size)
174
188
tensors= []
175
-
batch_size=16
176
189
177
190
current_batch= []
178
191
@@ -207,6 +220,7 @@ def compute_text_projection(
207
220
neighbors: str|None="neighbors",
208
221
model: str|None=None,
209
222
trust_remote_code: bool=False,
223
+
batch_size: int|None=None,
210
224
umap_args: dict= {},
211
225
):
212
226
"""
@@ -225,6 +239,8 @@ def compute_text_projection(
225
239
model: str, name or path of the SentenceTransformer model to use for embedding.
226
240
trust_remote_code: bool, whether to trust and execute remote code when loading
227
241
the model from HuggingFace Hub. Default is False.
242
+
batch_size: int, batch size for processing embeddings. Larger values use more
243
+
memory but may be faster. Default is 32.
228
244
umap_args: dict, additional keyword arguments to pass to the UMAP algorithm
229
245
(e.g., n_neighbors, min_dist, metric).
230
246
@@ -237,6 +253,7 @@ def compute_text_projection(
237
253
list(text_series),
238
254
model=model,
239
255
trust_remote_code=trust_remote_code,
256
+
batch_size=batch_size,
240
257
umap_args=umap_args,
241
258
)
242
259
data_frame[x] =proj.projection[:, 0]
@@ -314,6 +331,7 @@ def compute_image_projection(
314
331
neighbors: str|None="neighbors",
315
332
model: str|None=None,
316
333
trust_remote_code: bool=False,
334
+
batch_size: int|None=None,
317
335
umap_args: dict= {},
318
336
):
319
337
"""
@@ -332,6 +350,8 @@ def compute_image_projection(
332
350
model: str, name or path of the model to use for embedding.
333
351
trust_remote_code: bool, whether to trust and execute remote code when loading
334
352
the model from HuggingFace Hub. Default is False.
353
+
batch_size: int, batch size for processing images. Larger values use more
354
+
memory but may be faster. Default is 16.
335
355
umap_args: dict, additional keyword arguments to pass to the UMAP algorithm
0 commit comments