33
33
_logger : logging .Logger = logging .getLogger (__name__ )
34
34
35
35
36
+ def _get_version_id_for (version_id : Optional [Union [str , Dict [str , str ]]], path : str ) -> Optional [str ]:
37
+ if isinstance (version_id , dict ):
38
+ return version_id .get (path , None )
39
+
40
+ return version_id
41
+
42
+
36
43
def _get_read_details (path : str , pandas_kwargs : Dict [str , Any ]) -> Tuple [str , Optional [str ], Optional [str ]]:
37
44
if pandas_kwargs .get ("compression" , "infer" ) == "infer" :
38
45
pandas_kwargs ["compression" ] = infer_compression (path , compression = "infer" )
@@ -52,7 +59,7 @@ def _read_text_chunked(
52
59
s3_additional_kwargs : Optional [Dict [str , str ]],
53
60
dataset : bool ,
54
61
use_threads : Union [bool , int ],
55
- version_ids : Optional [Dict [str , str ]] = None ,
62
+ version_ids : Optional [Dict [str , Optional [ str ] ]] = None ,
56
63
) -> Iterator [pd .DataFrame ]:
57
64
for path in paths :
58
65
_logger .debug ("path: %s" , path )
@@ -157,19 +164,21 @@ def _read_text(
157
164
}
158
165
_logger .debug ("args:\n %s" , pprint .pformat (args ))
159
166
160
- if chunksize is not None :
161
- return _read_text_chunked (
162
- paths = paths , version_ids = version_id if isinstance ( version_id , dict ) else None , chunksize = chunksize , ** args
167
+ if len ( paths ) > 1 and version_id is not None and not isinstance ( version_id , dict ) :
168
+ raise exceptions . InvalidArgumentCombination (
169
+ "If multiple paths are provided along with a file version ID, the version ID parameter must be a dict."
163
170
)
171
+ version_id_dict = {path : _get_version_id_for (version_id , path ) for path in paths }
164
172
165
- version_id = version_id if isinstance (version_id , dict ) else None
173
+ if chunksize is not None :
174
+ return _read_text_chunked (paths = paths , version_ids = version_id_dict , chunksize = chunksize , ** args )
166
175
167
176
executor = _get_executor (use_threads = use_threads )
168
177
tables = executor .map (
169
178
_read_text_file ,
170
179
session ,
171
180
paths ,
172
- itertools . repeat ( version_id ) ,
181
+ [ version_id_dict [ path ] for path in paths ] ,
173
182
itertools .repeat (parser_func ),
174
183
itertools .repeat (path_root ),
175
184
itertools .repeat (pandas_kwargs ),
0 commit comments