@@ -39,6 +39,29 @@ def test_csv_encoding(path, encoding, strings, wrong_encoding, exception, line_t
39
39
assert df .equals (df2 )
40
40
41
41
42
+ @pytest .mark .parametrize (
43
+ "encoding,strings,wrong_encoding" ,
44
+ [
45
+ ("utf-8" , ["漢字" , "ãóú" , "г, д, ж, з, к, л" ], "ascii" ),
46
+ ("ISO-8859-15" , ["Ö, ö, Ü, ü" , "ãóú" , "øe" ], "ascii" ),
47
+ ],
48
+ )
49
+ def test_csv_ignore_encoding_errors (
50
+ path , encoding , strings , wrong_encoding
51
+ ):
52
+ file_path = f"{ path } 0.csv"
53
+ df = pd .DataFrame ({"c0" : [1 , 2 , 3 ], "c1" : strings })
54
+ wr .s3 .to_csv (df , file_path , index = False , encoding = encoding )
55
+ with pytest .raises (UnicodeDecodeError ):
56
+ df2 = wr .s3 .read_csv (file_path , encoding = wrong_encoding )
57
+ df2 = wr .s3 .read_csv (
58
+ file_path , encoding = wrong_encoding , encoding_errors = "ignore"
59
+ )
60
+ if isinstance (df2 , pd .DataFrame ) is False :
61
+ df2 = pd .concat (df2 , ignore_index = True )
62
+ assert df2 .shape == (3 , 4 )
63
+
64
+
42
65
@pytest .mark .parametrize ("use_threads" , [True , False , 2 ])
43
66
@pytest .mark .parametrize ("chunksize" , [None , 1 ])
44
67
def test_read_partitioned_json_paths (path , use_threads , chunksize ):
0 commit comments