Skip to content

Commit ddeee46

Browse files
authored
Merge branch 'main' into fix/iceberg-new-cols
2 parents 5f561c5 + e939741 commit ddeee46

File tree

8 files changed

+138
-139
lines changed

8 files changed

+138
-139
lines changed

awswrangler/distributed/ray/_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def __init__(self, max_concurrency: int) -> None:
3939
super().__init__()
4040

4141
_logger.debug("Initializing Ray Actor with maximum concurrency %d", max_concurrency)
42-
self._actor: ray.actor.ActorHandle = AsyncActor.options(max_concurrency=max_concurrency).remote() # type: ignore[attr-defined]
42+
self._actor: "ray.actor.ActorHandle[AsyncActor]" = AsyncActor.options(max_concurrency=max_concurrency).remote() # type: ignore[attr-defined]
4343

4444
def map(self, func: Callable[..., MapOutputType], _: "BaseClient" | None, *args: Any) -> list[MapOutputType]:
4545
"""Map func and return ray futures."""

awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,6 @@ def get_read_tasks(self, parallelism: int) -> list[ReadTask]:
344344

345345
meta = self._meta_provider(
346346
paths, # type: ignore[arg-type]
347-
self._inferred_schema,
348347
num_fragments=len(fragments),
349348
prefetched_metadata=metadata,
350349
)
@@ -566,7 +565,7 @@ def compute_batch_size_rows(sample_info: _SampleInfo) -> int:
566565
if sample_info.actual_bytes_per_row is None:
567566
return PARQUET_READER_ROW_BATCH_SIZE
568567
else:
569-
max_parquet_reader_row_batch_size_bytes = DataContext.get_current().target_max_block_size // 10
568+
max_parquet_reader_row_batch_size_bytes = DataContext.get_current().target_max_block_size // 10 # type: ignore[operator]
570569
return max(
571570
1,
572571
min(

awswrangler/distributed/ray/datasources/file_datasink.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def _write_block(write_path: str, block: pd.DataFrame) -> str:
7878
write_path = self.path
7979

8080
if write_path.endswith("/"):
81-
filename = self.filename_provider.get_filename_for_block(block, ctx.task_idx, 0)
81+
filename = self.filename_provider.get_filename_for_block(block, "", ctx.task_idx, 0)
8282
write_path = posixpath.join(self.path, filename)
8383

8484
return _write_block(write_path, block)

awswrangler/distributed/ray/datasources/filename_provider.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@ def __init__(
2626
def get_filename_for_block(
2727
self,
2828
block: Block,
29+
write_uuid: str,
2930
task_index: int,
3031
block_index: int,
3132
) -> str:
3233
file_id = f"{task_index:06}_{block_index:06}"
3334
return self._generate_filename(file_id)
3435

35-
def get_filename_for_row(self, row: dict[str, Any], task_index: int, block_index: int, row_index: int) -> str:
36+
def get_filename_for_row(
37+
self, row: dict[str, Any], write_uuid: str, task_index: int, block_index: int, row_index: int
38+
) -> str:
3639
file_id = f"{task_index:06}_{block_index:06}_{row_index:06}"
3740
return self._generate_filename(file_id)
3841

awswrangler/mysql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def connect(
168168
attrs: _db_utils.ConnectionAttributes = _db_utils.get_connection_attributes(
169169
connection=connection, secret_id=secret_id, catalog_id=catalog_id, dbname=dbname, boto3_session=boto3_session
170170
)
171-
if attrs.kind != "mysql":
171+
if attrs.kind not in ("mysql", "aurora-mysql"):
172172
raise exceptions.InvalidDatabaseType(f"Invalid connection type ({attrs.kind}. It must be a MySQL connection.)")
173173
return pymysql.connect(
174174
user=attrs.user,

awswrangler/postgresql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def connect(
225225
attrs: _db_utils.ConnectionAttributes = _db_utils.get_connection_attributes(
226226
connection=connection, secret_id=secret_id, catalog_id=catalog_id, dbname=dbname, boto3_session=boto3_session
227227
)
228-
if attrs.kind not in ("postgresql", "postgres"):
228+
if attrs.kind not in ("postgresql", "postgres", "aurora-postgresql"):
229229
raise exceptions.InvalidDatabaseType(
230230
f"Invalid connection type ({attrs.kind}. It must be a postgresql connection.)"
231231
)

pyproject.toml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,7 @@ dependencies = [
2323
"botocore>=1.23.32,<2",
2424
"pandas>=1.2.0,<3.0.0",
2525
"numpy>=1.26,<3.0",
26-
"pyarrow>=18.0.0,<21.0.0 ; sys_platform != 'darwin' or platform_machine != 'x86_64'",
27-
# pyarrow 18 causes macos build failures
28-
# https://github.com/ray-project/ray/pull/48446
29-
"pyarrow>=8.0.0,<18.0.0 ; sys_platform == 'darwin' and platform_machine == 'x86_64'",
26+
"pyarrow>=8.0.0,<21.0.0",
3027
"typing-extensions>=4.4.0,<5",
3128
"packaging>=21.1,<26.0",
3229
"setuptools ; python_version >= '3.12'",
@@ -41,7 +38,7 @@ oracle = ["oracledb>=1,<4"]
4138
gremlin = [
4239
"gremlinpython>=3.7.1,<4",
4340
"requests>=2.0.0,<3",
44-
"aiohttp>=3.9.0,<4",
41+
"aiohttp>=3.12.14,<4",
4542
"async-timeout>=4.0.3,<6.0.0",
4643
]
4744
sparql = [
@@ -58,8 +55,8 @@ openpyxl = ["openpyxl>=3.0.0,<4"]
5855
progressbar = ["progressbar2>=4.0.0,<5"]
5956
deltalake = ["deltalake>=0.18.0,<1.2.0"]
6057
geopandas = ["geopandas>=1.0.0,<2"]
61-
modin = ["modin>=0.31,<0.35"]
62-
ray = ["ray[default, data]>=2.45.0,<3"]
58+
modin = ["modin>=0.31,<0.36"]
59+
ray = ["ray[default, data]>=2.49.0,<3"]
6360

6461
[project.urls]
6562
Homepage = "https://aws-sdk-pandas.readthedocs.io/"

0 commit comments

Comments
 (0)