92
92
from ddtrace .llmobs ._experiment import Experiment
93
93
from ddtrace .llmobs ._experiment import ExperimentConfigType
94
94
from ddtrace .llmobs ._experiment import JSONType
95
+ from ddtrace .llmobs ._experiment import Project
95
96
from ddtrace .llmobs ._utils import AnnotationContext
96
97
from ddtrace .llmobs ._utils import LinkTracker
97
98
from ddtrace .llmobs ._utils import _get_ml_app
@@ -212,6 +213,7 @@ def __init__(
212
213
interval = float (os .getenv ("_DD_LLMOBS_WRITER_INTERVAL" , 1.0 )),
213
214
timeout = float (os .getenv ("_DD_LLMOBS_WRITER_TIMEOUT" , 5.0 )),
214
215
_app_key = self ._app_key ,
216
+ _default_project = Project (name = self ._project_name , _id = "" ),
215
217
is_agentless = True , # agent proxy doesn't seem to work for experiments
216
218
)
217
219
@@ -645,15 +647,21 @@ def enable(
645
647
)
646
648
647
649
@classmethod
648
- def pull_dataset (cls , name : str ) -> Dataset :
649
- ds = cls ._instance ._dne_client .dataset_get_with_records (name )
650
+ def pull_dataset (cls , dataset_name : str , project_name : Optional [ str ] = None ) -> Dataset :
651
+ ds = cls ._instance ._dne_client .dataset_get_with_records (dataset_name , ( project_name or cls . _project_name ) )
650
652
return ds
651
653
652
654
@classmethod
653
- def create_dataset (cls , name : str , description : str = "" , records : Optional [List [DatasetRecord ]] = None ) -> Dataset :
655
+ def create_dataset (
656
+ cls ,
657
+ dataset_name : str ,
658
+ project_name : Optional [str ] = None ,
659
+ description : str = "" ,
660
+ records : Optional [List [DatasetRecord ]] = None ,
661
+ ) -> Dataset :
654
662
if records is None :
655
663
records = []
656
- ds = cls ._instance ._dne_client .dataset_create (name , description )
664
+ ds = cls ._instance ._dne_client .dataset_create (dataset_name , project_name , description )
657
665
for r in records :
658
666
ds .append (r )
659
667
if len (records ) > 0 :
@@ -669,19 +677,20 @@ def create_dataset_from_csv(
669
677
expected_output_columns : Optional [List [str ]] = None ,
670
678
metadata_columns : Optional [List [str ]] = None ,
671
679
csv_delimiter : str = "," ,
672
- description = "" ,
680
+ description : str = "" ,
681
+ project_name : Optional [str ] = None ,
673
682
) -> Dataset :
674
683
if expected_output_columns is None :
675
684
expected_output_columns = []
676
685
if metadata_columns is None :
677
686
metadata_columns = []
678
- ds = cls ._instance ._dne_client .dataset_create (dataset_name , description )
679
687
680
688
# Store the original field size limit to restore it later
681
689
original_field_size_limit = csv .field_size_limit ()
682
690
683
691
csv .field_size_limit (EXPERIMENT_CSV_FIELD_MAX_SIZE ) # 10mb
684
692
693
+ records = []
685
694
try :
686
695
with open (csv_path , mode = "r" ) as csvfile :
687
696
content = csvfile .readline ().strip ()
@@ -708,7 +717,7 @@ def create_dataset_from_csv(
708
717
raise ValueError (f"Metadata columns not found in CSV header: { missing_metadata_columns } " )
709
718
710
719
for row in rows :
711
- ds .append (
720
+ records .append (
712
721
DatasetRecord (
713
722
input_data = {col : row [col ] for col in input_data_columns },
714
723
expected_output = {col : row [col ] for col in expected_output_columns },
@@ -721,6 +730,9 @@ def create_dataset_from_csv(
721
730
# Always restore the original field size limit
722
731
csv .field_size_limit (original_field_size_limit )
723
732
733
+ ds = cls ._instance ._dne_client .dataset_create (dataset_name , project_name , description )
734
+ for r in records :
735
+ ds .append (r )
724
736
if len (ds ) > 0 :
725
737
cls ._instance ._dne_client .dataset_bulk_upload (ds ._id , ds ._records )
726
738
return ds
0 commit comments