Skip to content

Commit 39bcd61

Browse files
committed
read_csv: add test case for encoding_errors pandas argument
1 parent 0c3de78 commit 39bcd61

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

tests/test_s3_text.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,29 @@ def test_csv_encoding(path, encoding, strings, wrong_encoding, exception, line_t
3939
assert df.equals(df2)
4040

4141

42+
@pytest.mark.parametrize(
43+
"encoding,strings,wrong_encoding",
44+
[
45+
("utf-8", ["漢字", "ãóú", "г, д, ж, з, к, л"], "ascii"),
46+
("ISO-8859-15", ["Ö, ö, Ü, ü", "ãóú", "øe"], "ascii"),
47+
],
48+
)
49+
def test_csv_ignore_encoding_errors(
50+
path, encoding, strings, wrong_encoding
51+
):
52+
file_path = f"{path}0.csv"
53+
df = pd.DataFrame({"c0": [1, 2, 3], "c1": strings})
54+
wr.s3.to_csv(df, file_path, index=False, encoding=encoding)
55+
with pytest.raises(UnicodeDecodeError):
56+
df2 = wr.s3.read_csv(file_path, encoding=wrong_encoding)
57+
df2 = wr.s3.read_csv(
58+
file_path, encoding=wrong_encoding, encoding_errors="ignore"
59+
)
60+
if isinstance(df2, pd.DataFrame) is False:
61+
df2 = pd.concat(df2, ignore_index=True)
62+
assert df2.shape == (3, 4)
63+
64+
4265
@pytest.mark.parametrize("use_threads", [True, False, 2])
4366
@pytest.mark.parametrize("chunksize", [None, 1])
4467
def test_read_partitioned_json_paths(path, use_threads, chunksize):

0 commit comments

Comments
 (0)