Skip to content

Commit 16410de

Browse files
committed
read_csv: read file as binary when encoding_errors is set to ignore
1 parent d2d4481 commit 16410de

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

awswrangler/s3/_fs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ def open_s3_object(
555555
newline: Optional[str] = "\n",
556556
encoding: Optional[str] = "utf-8",
557557
) -> Iterator[Union[_S3ObjectBase, io.TextIOWrapper]]:
558-
"""Return a _S3Object or TextIOWrapper based in the received mode."""
558+
"""Return a _S3Object or TextIOWrapper based on the received mode."""
559559
s3obj: Optional[_S3ObjectBase] = None
560560
text_s3obj: Optional[io.TextIOWrapper] = None
561561
try:

awswrangler/s3/_read_text.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
def _get_read_details(path: str, pandas_kwargs: Dict[str, Any]) -> Tuple[str, Optional[str], Optional[str]]:
2929
if pandas_kwargs.get("compression", "infer") == "infer":
3030
pandas_kwargs["compression"] = infer_compression(path, compression="infer")
31-
mode: str = "r" if pandas_kwargs.get("compression") is None else "rb"
31+
mode: str = "r" if pandas_kwargs.get("compression") is None and pandas_kwargs.get("encoding_errors") != 'ignore' else "rb"
3232
encoding: Optional[str] = pandas_kwargs.get("encoding", "utf-8")
3333
newline: Optional[str] = pandas_kwargs.get("lineterminator", None)
3434
return mode, encoding, newline
@@ -249,7 +249,7 @@ def read_csv(
249249
E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False``
250250
https://aws-sdk-pandas.readthedocs.io/en/2.17.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html
251251
pandas_kwargs :
252-
KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicit, just add valid
252+
KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicitly, just add valid
253253
Pandas arguments in the function call and awswrangler will accept it.
254254
e.g. wr.s3.read_csv('s3://bucket/prefix/', sep='|', na_values=['null', 'none'], skip_blank_lines=True)
255255
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
@@ -292,7 +292,7 @@ def read_csv(
292292
"""
293293
if "pandas_kwargs" in pandas_kwargs:
294294
raise exceptions.InvalidArgument(
295-
"You can NOT pass `pandas_kwargs` explicit, just add valid "
295+
"You can NOT pass `pandas_kwargs` explicitly, just add valid "
296296
"Pandas arguments in the function call and awswrangler will accept it."
297297
"e.g. wr.s3.read_csv('s3://bucket/prefix/', sep='|', skip_blank_lines=True)"
298298
)

0 commit comments

Comments
 (0)