Skip to content

Commit bb1df84

Browse files
elronbandeldafnapension
authored andcommitted
Added test and fix
Signed-off-by: elronbandel <[email protected]>
1 parent 38757ec commit bb1df84

File tree

2,524 files changed

+2901
-25101
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,524 files changed

+2901
-25101
lines changed

.github/workflows/catalog_preparation.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
preparation:
1515

1616
runs-on: ubuntu-latest
17-
timeout-minutes: 45
17+
timeout-minutes: 30
1818
env:
1919
OS: ubuntu-latest
2020
UNITXT_DEFAULT_VERBOSITY: error
@@ -27,7 +27,7 @@ jobs:
2727

2828
strategy:
2929
matrix:
30-
modulo: [0,1,2,3,4,5,6,7,8,9,10,11]
30+
modulo: [0,1,2,3,4,5,6,7]
3131

3232
steps:
3333
- uses: actions/checkout@v5
@@ -53,7 +53,7 @@ jobs:
5353
run: |
5454
modulo="${{ matrix.modulo }}"
5555
echo "modulo=${modulo}" >> $GITHUB_STEP_SUMMARY
56-
echo "sed -i 's/^num_par = 1 /num_par = 12 /' tests/catalog/test_preparation.py" > sedit.sh
56+
echo "sed -i 's/^num_par = 1 /num_par = 8 /' tests/catalog/test_preparation.py" > sedit.sh
5757
echo "sed -i 's/^modulo = 0/modulo = ${modulo}/' tests/catalog/test_preparation.py" >> sedit.sh
5858
sh sedit.sh
5959
python -m unittest tests.catalog.test_preparation

src/unitxt/artifact.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,14 @@ class Artifact(Dataclass):
446446
default=None, required=False, also_positional=False
447447
)
448448

449+
def __init_subclass__(cls, **kwargs):
450+
super().__init_subclass__(**kwargs)
451+
module = inspect.getmodule(cls)
452+
# standardize module name
453+
module_name = getattr(module, "__name__", None)
454+
if not is_library_module(module_name):
455+
cls.register_class()
456+
449457
@classmethod
450458
def is_possible_identifier(cls, obj):
451459
return isinstance(obj, str) or is_artifact_dict(obj)
@@ -458,18 +466,15 @@ def get_artifact_type(cls):
458466
if not is_library_module(module_name):
459467
non_library_module_warning = f"module named {module_name} is not importable. Class {cls} is thus registered into Artifact.class_register, indexed by {cls.__name__}, accessible there as long as this class_register lives."
460468
warnings.warn(non_library_module_warning, ImportWarning, stacklevel=2)
461-
cls.register_class(cls)
469+
cls.register_class()
462470
return {"module": "class_register", "name": cls.__name__}
463471
if hasattr(cls, "__qualname__") and "." in cls.__qualname__:
464472
return {"module": module_name, "name": cls.__qualname__}
465473
return {"module": module_name, "name": cls.__name__}
466474

467475
@classmethod
468-
def register_class(cls, artifact_class):
469-
Artifact._class_register[artifact_class.__name__] = artifact_class
470-
471-
def __init_subclass__(cls, **kwargs):
472-
super().__init_subclass__(**kwargs)
476+
def register_class(cls):
477+
Artifact._class_register[cls.__name__] = cls
473478

474479
@classmethod
475480
def is_artifact_file(cls, path):
@@ -603,7 +608,7 @@ def maybe_fix_type_to_ensure_instantiation_ability(self):
603608
not is_library_module(self.__type__["module"])
604609
or "<locals>" in self.__type__["name"]
605610
):
606-
self.__class__.register_class(self.__class__)
611+
self.__class__.register_class()
607612
self.__type__ = {
608613
"module": "class_register",
609614
"name": self.__class__.__name__,

src/unitxt/catalog/cards/banking77.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@
88
"module": "unitxt.loaders",
99
"name": "LoadHF"
1010
},
11-
"path": "PolyAI/banking77"
11+
"path": "PolyAI/banking77",
12+
"revision": "refs/convert/parquet",
13+
"splits": [
14+
"train",
15+
"test"
16+
]
1217
},
1318
"preprocess_steps": [
1419
{

src/unitxt/catalog/cards/biggen_bench/results/human_eval.json

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
{
2-
"__type__": "task_card",
2+
"__type__": {
3+
"module": "unitxt.card",
4+
"name": "TaskCard"
5+
},
36
"loader": {
4-
"__type__": "load_hf",
7+
"__type__": {
8+
"module": "unitxt.loaders",
9+
"name": "LoadHF"
10+
},
511
"path": "prometheus-eval/BiGGen-Bench-Results",
612
"splits": [
713
"human_eval",
@@ -10,7 +16,10 @@
1016
},
1117
"preprocess_steps": [
1218
{
13-
"__type__": "merge_streams",
19+
"__type__": {
20+
"module": "unitxt.operators",
21+
"name": "MergeStreams"
22+
},
1423
"streams_to_merge": [
1524
"human_eval",
1625
"multilingual_human_eval"
@@ -19,7 +28,10 @@
1928
"add_origin_stream_name": true
2029
},
2130
{
22-
"__type__": "set",
31+
"__type__": {
32+
"module": "unitxt.operators",
33+
"name": "Set"
34+
},
2335
"fields": {
2436
"criteria": {
2537
"name": "",
@@ -63,17 +75,26 @@
6375
}
6476
},
6577
{
66-
"__type__": "cast",
78+
"__type__": {
79+
"module": "unitxt.operators",
80+
"name": "Cast"
81+
},
6782
"field": "human_score",
6883
"to": "float"
6984
},
7085
{
71-
"__type__": "format_text",
86+
"__type__": {
87+
"module": "unitxt.string_operators",
88+
"name": "FormatText"
89+
},
7290
"text": "{capability}-{task}",
7391
"to_field": "criteria_name"
7492
},
7593
{
76-
"__type__": "copy",
94+
"__type__": {
95+
"module": "unitxt.operators",
96+
"name": "Copy"
97+
},
7798
"field_to_field": {
7899
"criteria_name": "criteria/name",
79100
"score_rubric/criteria": "criteria/description",
@@ -85,12 +106,18 @@
85106
}
86107
},
87108
{
88-
"__type__": "create_criteria_with_options_from_dict",
109+
"__type__": {
110+
"module": "unitxt.llm_as_judge_operators",
111+
"name": "CreateCriteriaWithOptionsFromDict"
112+
},
89113
"field": "criteria"
90114
}
91115
],
92116
"task": {
93-
"__type__": "task",
117+
"__type__": {
118+
"module": "unitxt.task",
119+
"name": "Task"
120+
},
94121
"input_fields": {
95122
"system_prompt": "str",
96123
"input": "str",

src/unitxt/catalog/cards/coedit/preference.json

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,26 @@
2929
"by": ": "
3030
},
3131
{
32-
"__type__": "copy",
32+
"__type__": {
33+
"module": "unitxt.operators",
34+
"name": "Copy"
35+
},
3336
"field": "src/0",
3437
"to_field": "instance_instruction"
3538
},
3639
{
37-
"__type__": "slice",
40+
"__type__": {
41+
"module": "unitxt.collections_operators",
42+
"name": "Slice"
43+
},
3844
"field": "src",
3945
"start": 1
4046
},
4147
{
42-
"__type__": "join",
48+
"__type__": {
49+
"module": "unitxt.string_operators",
50+
"name": "Join"
51+
},
4352
"field": "src",
4453
"by": ": "
4554
},

src/unitxt/catalog/cards/dart.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,15 @@
88
"module": "unitxt.loaders",
99
"name": "LoadHF"
1010
},
11-
"path": "dart"
11+
"path": "Yale-LILY/dart",
12+
"revision": "refs/convert/parquet",
13+
"splits": [
14+
"train",
15+
"validation"
16+
],
17+
"data_classification_policy": [
18+
"public"
19+
]
1220
},
1321
"preprocess_steps": [
1422
"splitters.small_no_test",

src/unitxt/catalog/cards/fin_qa.json

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"name": "TaskCard"
55
},
66
"loader": {
7-
"__type__": "load_json_file",
7+
"__type__": {
8+
"module": "unitxt.loaders",
9+
"name": "LoadJsonFile"
10+
},
811
"files": {
912
"train": "https://raw.githubusercontent.com/czyssrs/FinQA/0f16e2867befa6840783e58be38c9efb9229d742/dataset/train.json",
1013
"validation": "https://raw.githubusercontent.com/czyssrs/FinQA/0f16e2867befa6840783e58be38c9efb9229d742/dataset/dev.json",
@@ -17,21 +20,28 @@
1720
"preprocess_steps": [
1821
{
1922
"__type__": {
20-
"module": "unitxt.collections_operators",
21-
"name": "GetLength"
23+
"module": "unitxt.operators",
24+
"name": "Copy"
2225
},
23-
"field": "table",
24-
"to_field": "table_length"
26+
"field": "qa/question",
27+
"to_field": "question"
2528
},
2629
{
2730
"__type__": {
2831
"module": "unitxt.operators",
29-
"name": "FilterByCondition"
32+
"name": "Copy"
3033
},
31-
"values": {
32-
"table_length": 1
34+
"field": "qa/answer",
35+
"to_field": "answer"
36+
},
37+
{
38+
"__type__": {
39+
"module": "unitxt.operators",
40+
"name": "Cast"
3341
},
34-
"condition": "gt"
42+
"field": "qa/program",
43+
"to": "str",
44+
"to_field": "program_re"
3545
},
3646
{
3747
"__type__": {
@@ -41,6 +51,24 @@
4151
"field": "pre_text/0",
4252
"to_field": "pre_text"
4353
},
54+
{
55+
"__type__": {
56+
"module": "unitxt.collections_operators",
57+
"name": "GetLength"
58+
},
59+
"field": "table",
60+
"to_field": "table_length"
61+
},
62+
{
63+
"__type__": {
64+
"module": "unitxt.operators",
65+
"name": "FilterByCondition"
66+
},
67+
"values": {
68+
"table_length": 1
69+
},
70+
"condition": "gt"
71+
},
4472
{
4573
"__type__": {
4674
"module": "unitxt.operators",

src/unitxt/catalog/cards/global_mmlu/am/abstract_algebra.json

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,6 @@
1313
"filtering_lambda": "lambda x: x['subject'] == 'abstract_algebra'"
1414
},
1515
"preprocess_steps": [
16-
{
17-
"__type__": {
18-
"module": "unitxt.operators",
19-
"name": "FilterByCondition"
20-
},
21-
"values": {
22-
"subject": "abstract_algebra"
23-
},
24-
"condition": "eq"
25-
},
2616
{
2717
"__type__": {
2818
"module": "unitxt.operators",

src/unitxt/catalog/cards/global_mmlu/am/anatomy.json

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,6 @@
1313
"filtering_lambda": "lambda x: x['subject'] == 'anatomy'"
1414
},
1515
"preprocess_steps": [
16-
{
17-
"__type__": {
18-
"module": "unitxt.operators",
19-
"name": "FilterByCondition"
20-
},
21-
"values": {
22-
"subject": "anatomy"
23-
},
24-
"condition": "eq"
25-
},
2616
{
2717
"__type__": {
2818
"module": "unitxt.operators",

src/unitxt/catalog/cards/global_mmlu/am/astronomy.json

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,6 @@
1313
"filtering_lambda": "lambda x: x['subject'] == 'astronomy'"
1414
},
1515
"preprocess_steps": [
16-
{
17-
"__type__": {
18-
"module": "unitxt.operators",
19-
"name": "FilterByCondition"
20-
},
21-
"values": {
22-
"subject": "astronomy"
23-
},
24-
"condition": "eq"
25-
},
2616
{
2717
"__type__": {
2818
"module": "unitxt.operators",

0 commit comments

Comments
 (0)