@@ -72,8 +72,7 @@ def test_csv_write(path, compression):
72
72
assert df .shape == df2 .shape == df3 .shape
73
73
74
74
75
- # @pytest.mark.parametrize("compression", ["gzip", "bz2", "xz", "zip", None]) # Removed due a Pandas bug
76
- @pytest .mark .parametrize ("compression" , [None ])
75
+ @pytest .mark .parametrize ("compression" , ["gzip" , "bz2" , "xz" , "zip" , None ])
77
76
def test_json (path , compression ):
78
77
path_file = f"{ path } test.json{ EXT .get (compression , '' )} "
79
78
df = pd .DataFrame ({"id" : [1 , 2 , 3 ]})
@@ -88,22 +87,41 @@ def test_json(path, compression):
88
87
89
88
90
89
@pytest .mark .parametrize ("chunksize" , [None , 1 ])
91
- # @pytest.mark.parametrize("compression", ["gzip", "bz2", "xz", "zip", None]) # Removed due a Pandas bug
92
- @pytest .mark .parametrize ("compression" , [None ])
90
+ @pytest .mark .parametrize ("compression" , ["gzip" , "bz2" , "xz" , "zip" , None ])
93
91
def test_partitioned_json (path , compression , chunksize ):
94
- df = pd .DataFrame ({"c0" : [0 , 1 ], "c1" : ["foo" , "boo" ]})
95
- paths = [f"{ path } year={ y } /month={ m } /0.json{ EXT .get (compression , '' )} " for y , m in [(2020 , 1 ), (2020 , 2 ), (2021 , 1 )]]
92
+ df = pd .DataFrame (
93
+ {
94
+ "c0" : [0 , 1 , 2 , 3 ],
95
+ "c1" : ["foo" , "boo" , "bar" , "baz" ],
96
+ "year" : [2020 , 2020 , 2021 , 2021 ],
97
+ "month" : [1 , 2 , 1 , 2 ],
98
+ }
99
+ )
96
100
if version_info < (3 , 7 ) and compression :
97
101
with pytest .raises (wr .exceptions .InvalidArgument ):
98
- for p in paths :
99
- wr .s3 .to_json (df , p , orient = "records" , lines = True , compression = compression )
102
+ wr .s3 .to_json (
103
+ df ,
104
+ path ,
105
+ orient = "records" ,
106
+ lines = True ,
107
+ compression = compression ,
108
+ dataset = True ,
109
+ partition_cols = ["year" , "month" ],
110
+ )
100
111
else :
101
- for p in paths :
102
- wr .s3 .to_json (df , p , orient = "records" , lines = True , compression = compression )
112
+ wr .s3 .to_json (
113
+ df ,
114
+ path ,
115
+ orient = "records" ,
116
+ lines = True ,
117
+ compression = compression ,
118
+ dataset = True ,
119
+ partition_cols = ["year" , "month" ],
120
+ )
103
121
df2 = wr .s3 .read_json (path , dataset = True , chunksize = chunksize )
104
122
if chunksize is None :
105
- assert df2 .shape == (6 , 4 )
106
- assert df2 .c0 .sum () == 3
123
+ assert df2 .shape == (4 , 4 )
124
+ assert df2 .c0 .sum () == 6
107
125
else :
108
126
for d in df2 :
109
127
assert d .shape == (1 , 4 )
0 commit comments