15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
+ use std:: collections:: HashSet ;
18
19
use std:: sync:: Arc ;
19
20
20
21
use futures:: channel:: mpsc:: Sender ;
21
22
use futures:: { SinkExt , TryFutureExt } ;
23
+ use itertools:: Itertools ;
22
24
23
25
use crate :: delete_file_index:: DeleteFileIndex ;
24
26
use crate :: expr:: { Bind , BoundPredicate , Predicate } ;
@@ -28,11 +30,12 @@ use crate::scan::{
28
30
PartitionFilterCache ,
29
31
} ;
30
32
use crate :: spec:: {
31
- ManifestContentType , ManifestEntryRef , ManifestFile , ManifestList , SchemaRef , SnapshotRef ,
32
- TableMetadataRef ,
33
+ DataContentType , ManifestContentType , ManifestEntryRef , ManifestFile , ManifestList ,
34
+ ManifestStatus , Operation , SchemaRef , SnapshotRef , TableMetadataRef ,
33
35
} ;
34
36
use crate :: { Error , ErrorKind , Result } ;
35
37
38
+ type ManifestEntryFilterFn = dyn Fn ( & ManifestEntryRef ) -> bool + Send + Sync ;
36
39
/// Wraps a [`ManifestFile`] alongside the objects that are needed
37
40
/// to process it in a thread-safe manner
38
41
pub ( crate ) struct ManifestFileContext {
@@ -46,6 +49,10 @@ pub(crate) struct ManifestFileContext {
46
49
snapshot_schema : SchemaRef ,
47
50
expression_evaluator_cache : Arc < ExpressionEvaluatorCache > ,
48
51
delete_file_index : DeleteFileIndex ,
52
+
53
+ /// filter manifest entries.
54
+ /// Used for different kind of scans, e.g., only scan newly added files without delete files.
55
+ filter_fn : Option < Arc < ManifestEntryFilterFn > > ,
49
56
}
50
57
51
58
/// Wraps a [`ManifestEntryRef`] alongside the objects that are needed
@@ -74,12 +81,13 @@ impl ManifestFileContext {
74
81
mut sender,
75
82
expression_evaluator_cache,
76
83
delete_file_index,
77
- ..
84
+ filter_fn ,
78
85
} = self ;
86
+ let filter_fn = filter_fn. unwrap_or_else ( || Arc :: new ( |_| true ) ) ;
79
87
80
88
let manifest = object_cache. get_manifest ( & manifest_file) . await ?;
81
89
82
- for manifest_entry in manifest. entries ( ) {
90
+ for manifest_entry in manifest. entries ( ) . iter ( ) . filter ( |e| filter_fn ( e ) ) {
83
91
let manifest_entry_context = ManifestEntryContext {
84
92
// TODO: refactor to avoid the expensive ManifestEntry clone
85
93
manifest_entry : manifest_entry. clone ( ) ,
@@ -149,6 +157,11 @@ pub(crate) struct PlanContext {
149
157
pub partition_filter_cache : Arc < PartitionFilterCache > ,
150
158
pub manifest_evaluator_cache : Arc < ManifestEvaluatorCache > ,
151
159
pub expression_evaluator_cache : Arc < ExpressionEvaluatorCache > ,
160
+
161
+ // for incremental scan.
162
+ // If `to_snapshot_id` is set, it means incremental scan. `from_snapshot_id` can be `None`.
163
+ pub from_snapshot_id : Option < i64 > ,
164
+ pub to_snapshot_id : Option < i64 > ,
152
165
}
153
166
154
167
impl PlanContext {
@@ -180,19 +193,72 @@ impl PlanContext {
180
193
Ok ( partition_filter)
181
194
}
182
195
183
- pub ( crate ) fn build_manifest_file_contexts (
196
+ pub ( crate ) async fn build_manifest_file_contexts (
184
197
& self ,
185
198
manifest_list : Arc < ManifestList > ,
186
199
tx_data : Sender < ManifestEntryContext > ,
187
200
delete_file_idx : DeleteFileIndex ,
188
201
delete_file_tx : Sender < ManifestEntryContext > ,
189
202
) -> Result < Box < impl Iterator < Item = Result < ManifestFileContext > > + ' static > > {
190
- let manifest_files = manifest_list. entries ( ) . iter ( ) ;
203
+ let mut filter_fn: Option < Arc < ManifestEntryFilterFn > > = None ;
204
+ let manifest_files = {
205
+ if let Some ( to_snapshot_id) = self . to_snapshot_id {
206
+ // Incremental scan mode:
207
+ // Get all added files between two snapshots.
208
+ // - data files in `Append` and `Overwrite` snapshots are included.
209
+ // - delete files are ignored
210
+ // - `Replace` snapshots (e.g., compaction) are ignored.
211
+ //
212
+ // `latest_snapshot_id` is inclusive, `oldest_snapshot_id` is exclusive.
213
+
214
+ let snapshots =
215
+ ancestors_between ( & self . table_metadata , to_snapshot_id, self . from_snapshot_id )
216
+ . filter ( |snapshot| {
217
+ matches ! (
218
+ snapshot. summary( ) . operation,
219
+ Operation :: Append | Operation :: Overwrite
220
+ )
221
+ } )
222
+ . collect_vec ( ) ;
223
+ let snapshot_ids: HashSet < i64 > = snapshots
224
+ . iter ( )
225
+ . map ( |snapshot| snapshot. snapshot_id ( ) )
226
+ . collect ( ) ;
227
+
228
+ let mut manifest_files = vec ! [ ] ;
229
+ for snapshot in snapshots {
230
+ let manifest_list = self
231
+ . object_cache
232
+ . get_manifest_list ( & snapshot, & self . table_metadata )
233
+ . await ?;
234
+ for entry in manifest_list. entries ( ) {
235
+ if !snapshot_ids. contains ( & entry. added_snapshot_id ) {
236
+ continue ;
237
+ }
238
+ manifest_files. push ( entry. clone ( ) ) ;
239
+ }
240
+ }
241
+
242
+ filter_fn = Some ( Arc :: new ( move |entry : & ManifestEntryRef | {
243
+ matches ! ( entry. status( ) , ManifestStatus :: Added )
244
+ && matches ! ( entry. data_file( ) . content_type( ) , DataContentType :: Data )
245
+ && (
246
+ // Is it possible that the snapshot id here is not contained?
247
+ entry. snapshot_id ( ) . is_none ( )
248
+ || snapshot_ids. contains ( & entry. snapshot_id ( ) . unwrap ( ) )
249
+ )
250
+ } ) ) ;
251
+
252
+ manifest_files
253
+ } else {
254
+ manifest_list. entries ( ) . to_vec ( )
255
+ }
256
+ } ;
191
257
192
258
// TODO: Ideally we could ditch this intermediate Vec as we return an iterator.
193
259
let mut filtered_mfcs = vec ! [ ] ;
194
260
195
- for manifest_file in manifest_files {
261
+ for manifest_file in & manifest_files {
196
262
let tx = if manifest_file. content == ManifestContentType :: Deletes {
197
263
delete_file_tx. clone ( )
198
264
} else {
@@ -225,6 +291,7 @@ impl PlanContext {
225
291
partition_bound_predicate,
226
292
tx,
227
293
delete_file_idx. clone ( ) ,
294
+ filter_fn. clone ( ) ,
228
295
) ;
229
296
230
297
filtered_mfcs. push ( Ok ( mfc) ) ;
@@ -239,6 +306,7 @@ impl PlanContext {
239
306
partition_filter : Option < Arc < BoundPredicate > > ,
240
307
sender : Sender < ManifestEntryContext > ,
241
308
delete_file_index : DeleteFileIndex ,
309
+ filter_fn : Option < Arc < ManifestEntryFilterFn > > ,
242
310
) -> ManifestFileContext {
243
311
let bound_predicates =
244
312
if let ( Some ( ref partition_bound_predicate) , Some ( snapshot_bound_predicate) ) =
@@ -261,6 +329,61 @@ impl PlanContext {
261
329
field_ids : self . field_ids . clone ( ) ,
262
330
expression_evaluator_cache : self . expression_evaluator_cache . clone ( ) ,
263
331
delete_file_index,
332
+ filter_fn,
264
333
}
265
334
}
266
335
}
336
+
337
+ struct Ancestors {
338
+ next : Option < SnapshotRef > ,
339
+ get_snapshot : Box < dyn Fn ( i64 ) -> Option < SnapshotRef > + Send > ,
340
+ }
341
+
342
+ impl Iterator for Ancestors {
343
+ type Item = SnapshotRef ;
344
+
345
+ fn next ( & mut self ) -> Option < Self :: Item > {
346
+ let snapshot = self . next . take ( ) ?;
347
+ let result = snapshot. clone ( ) ;
348
+ self . next = snapshot
349
+ . parent_snapshot_id ( )
350
+ . and_then ( |id| ( self . get_snapshot ) ( id) ) ;
351
+ Some ( result)
352
+ }
353
+ }
354
+
355
+ /// Iterate starting from `snapshot` (inclusive) to the root snapshot.
356
+ fn ancestors_of (
357
+ table_metadata : & TableMetadataRef ,
358
+ snapshot : i64 ,
359
+ ) -> Box < dyn Iterator < Item = SnapshotRef > + Send > {
360
+ if let Some ( snapshot) = table_metadata. snapshot_by_id ( snapshot) {
361
+ let table_metadata = table_metadata. clone ( ) ;
362
+ Box :: new ( Ancestors {
363
+ next : Some ( snapshot. clone ( ) ) ,
364
+ get_snapshot : Box :: new ( move |id| table_metadata. snapshot_by_id ( id) . cloned ( ) ) ,
365
+ } )
366
+ } else {
367
+ Box :: new ( std:: iter:: empty ( ) )
368
+ }
369
+ }
370
+
371
+ /// Iterate starting from `snapshot` (inclusive) to `oldest_snapshot_id` (exclusive).
372
+ fn ancestors_between (
373
+ table_metadata : & TableMetadataRef ,
374
+ latest_snapshot_id : i64 ,
375
+ oldest_snapshot_id : Option < i64 > ,
376
+ ) -> Box < dyn Iterator < Item = SnapshotRef > + Send > {
377
+ let Some ( oldest_snapshot_id) = oldest_snapshot_id else {
378
+ return Box :: new ( ancestors_of ( table_metadata, latest_snapshot_id) ) ;
379
+ } ;
380
+
381
+ if latest_snapshot_id == oldest_snapshot_id {
382
+ return Box :: new ( std:: iter:: empty ( ) ) ;
383
+ }
384
+
385
+ Box :: new (
386
+ ancestors_of ( table_metadata, latest_snapshot_id)
387
+ . take_while ( move |snapshot| snapshot. snapshot_id ( ) != oldest_snapshot_id) ,
388
+ )
389
+ }
0 commit comments