You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
:param procs_cpu_bound: Number of cores used for CPU bound tasks
506
507
:param procs_io_bound: Number of cores used for I/O bound tasks
508
+
:param inplace: True is cheapest (CPU and Memory) but False leaves your DataFrame intact
507
509
:return: List of objects written on S3
508
510
"""
509
511
ifserdenotinPandas.VALID_CSV_SERDES:
@@ -522,7 +524,8 @@ def to_csv(
522
524
compression=None,
523
525
procs_cpu_bound=procs_cpu_bound,
524
526
procs_io_bound=procs_io_bound,
525
-
extra_args=extra_args)
527
+
extra_args=extra_args,
528
+
inplace=inplace)
526
529
527
530
defto_parquet(self,
528
531
dataframe,
@@ -535,7 +538,8 @@ def to_parquet(self,
535
538
compression="snappy",
536
539
procs_cpu_bound=None,
537
540
procs_io_bound=None,
538
-
cast_columns=None):
541
+
cast_columns=None,
542
+
inplace=True):
539
543
"""
540
544
Write a Pandas Dataframe as parquet files on S3
541
545
Optionally writes metadata on AWS Glue.
@@ -550,7 +554,8 @@ def to_parquet(self,
550
554
:param compression: None, snappy, gzip, lzo
551
555
:param procs_cpu_bound: Number of cores used for CPU bound tasks
552
556
:param procs_io_bound: Number of cores used for I/O bound tasks
553
-
:param cast_columns: Dictionary of columns names and Athena/Glue types to be casted. (E.g. {"col name": "bigint", "col2 name": "int"})
557
+
:param cast_columns: Dictionary of columns names and Athena/Glue types to be casted (E.g. {"col name": "bigint", "col2 name": "int"})
558
+
:param inplace: True is cheapest (CPU and Memory) but False leaves your DataFrame intact
554
559
:return: List of objects written on S3
555
560
"""
556
561
returnself.to_s3(dataframe=dataframe,
@@ -564,7 +569,8 @@ def to_parquet(self,
564
569
compression=compression,
565
570
procs_cpu_bound=procs_cpu_bound,
566
571
procs_io_bound=procs_io_bound,
567
-
cast_columns=cast_columns)
572
+
cast_columns=cast_columns,
573
+
inplace=inplace)
568
574
569
575
defto_s3(self,
570
576
dataframe,
@@ -579,7 +585,8 @@ def to_s3(self,
579
585
procs_cpu_bound=None,
580
586
procs_io_bound=None,
581
587
cast_columns=None,
582
-
extra_args=None):
588
+
extra_args=None,
589
+
inplace=True):
583
590
"""
584
591
Write a Pandas Dataframe on S3
585
592
Optionally writes metadata on AWS Glue.
@@ -597,9 +604,13 @@ def to_s3(self,
597
604
:param procs_io_bound: Number of cores used for I/O bound tasks
598
605
:param cast_columns: Dictionary of columns names and Athena/Glue types to be casted. (E.g. {"col name": "bigint", "col2 name": "int"}) (Only for "parquet" file_format)
599
606
:param extra_args: Extra arguments specific for each file formats (E.g. "sep" for CSV)
607
+
:param inplace: True is cheapest (CPU and Memory) but False leaves your DataFrame intact
0 commit comments