Skip to content

Commit 56241df

Browse files
committed
(fix): JSON compression is passed twice when dataset=True
1 parent 7c4f20c commit 56241df

File tree

2 files changed

+11
-16
lines changed

2 files changed

+11
-16
lines changed

awswrangler/s3/_write_text.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat
941941
**pandas_kwargs,
942942
)
943943

944-
compression: Optional[str] = pandas_kwargs.get("compression", None)
944+
compression: Optional[str] = pandas_kwargs.pop("compression", None)
945945
df = df[columns] if columns else df
946946

947947
columns_types: Dict[str, str] = {}
@@ -980,7 +980,7 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat
980980
projection_storage_location_template=None,
981981
catalog_table_input=catalog_table_input,
982982
catalog_id=catalog_id,
983-
compression=pandas_kwargs.get("compression"),
983+
compression=compression,
984984
serde_library=None,
985985
serde_parameters=None,
986986
)
@@ -1047,7 +1047,7 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat
10471047
projection_storage_location_template=None,
10481048
catalog_table_input=catalog_table_input,
10491049
catalog_id=catalog_id,
1050-
compression=pandas_kwargs.get("compression"),
1050+
compression=compression,
10511051
serde_library=serde_library,
10521052
serde_parameters=serde_parameters,
10531053
)
@@ -1063,7 +1063,7 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat
10631063
serde_parameters=serde_parameters,
10641064
catalog_id=catalog_id,
10651065
columns_types=columns_types,
1066-
compression=pandas_kwargs.get("compression"),
1066+
compression=compression,
10671067
)
10681068
if commit_trans:
10691069
lakeformation.commit_transaction(

tests/test_s3_text_compressed.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ def test_csv_write(path, compression):
7272
assert df.shape == df2.shape == df3.shape
7373

7474

75-
# @pytest.mark.parametrize("compression", ["gzip", "bz2", "xz", "zip", None]) # Removed due a Pandas bug
76-
@pytest.mark.parametrize("compression", [None])
75+
@pytest.mark.parametrize("compression", ["gzip", "bz2", "xz", "zip", None])
7776
def test_json(path, compression):
7877
path_file = f"{path}test.json{EXT.get(compression, '')}"
7978
df = pd.DataFrame({"id": [1, 2, 3]})
@@ -88,22 +87,18 @@ def test_json(path, compression):
8887

8988

9089
@pytest.mark.parametrize("chunksize", [None, 1])
91-
# @pytest.mark.parametrize("compression", ["gzip", "bz2", "xz", "zip", None]) # Removed due a Pandas bug
92-
@pytest.mark.parametrize("compression", [None])
90+
@pytest.mark.parametrize("compression", ["gzip", "bz2", "xz", "zip", None])
9391
def test_partitioned_json(path, compression, chunksize):
94-
df = pd.DataFrame({"c0": [0, 1], "c1": ["foo", "boo"]})
95-
paths = [f"{path}year={y}/month={m}/0.json{EXT.get(compression, '')}" for y, m in [(2020, 1), (2020, 2), (2021, 1)]]
92+
df = pd.DataFrame({"c0": [0, 1, 2, 3], "c1": ["foo", "boo", "bar", "baz"], "year": [2020, 2020, 2021, 2021], "month": [1, 2, 1, 2]})
9693
if version_info < (3, 7) and compression:
9794
with pytest.raises(wr.exceptions.InvalidArgument):
98-
for p in paths:
99-
wr.s3.to_json(df, p, orient="records", lines=True, compression=compression)
95+
wr.s3.to_json(df, path, orient="records", lines=True, compression=compression, dataset=True, partition_cols=["year", "month"])
10096
else:
101-
for p in paths:
102-
wr.s3.to_json(df, p, orient="records", lines=True, compression=compression)
97+
wr.s3.to_json(df, path, orient="records", lines=True, compression=compression, dataset=True, partition_cols=["year", "month"])
10398
df2 = wr.s3.read_json(path, dataset=True, chunksize=chunksize)
10499
if chunksize is None:
105-
assert df2.shape == (6, 4)
106-
assert df2.c0.sum() == 3
100+
assert df2.shape == (4, 4)
101+
assert df2.c0.sum() == 6
107102
else:
108103
for d in df2:
109104
assert d.shape == (1, 4)

0 commit comments

Comments
 (0)