@@ -27,6 +27,7 @@ class _PartialUdf:
27
27
"""Helper class to provide typing overloads for using `daft.func` as a decorator."""
28
28
29
29
return_dtype : DataTypeLike | None
30
+ unnest : bool
30
31
31
32
@overload
32
33
def __call__ (self , fn : Callable [P , Iterator [T ]]) -> GeneratorUdf [P , T ]: ... # type: ignore[overload-overlap]
@@ -35,9 +36,9 @@ def __call__(self, fn: Callable[P, T]) -> RowWiseUdf[P, T]: ...
35
36
36
37
def __call__ (self , fn : Callable [P , Any ]) -> GeneratorUdf [P , Any ] | RowWiseUdf [P , Any ]:
37
38
if isgeneratorfunction (fn ):
38
- return GeneratorUdf (fn , return_dtype = self .return_dtype )
39
+ return GeneratorUdf (fn , return_dtype = self .return_dtype , unnest = self . unnest )
39
40
else :
40
- return RowWiseUdf (fn , return_dtype = self .return_dtype )
41
+ return RowWiseUdf (fn , return_dtype = self .return_dtype , unnest = self . unnest )
41
42
42
43
43
44
class _DaftFuncDecorator :
@@ -54,6 +55,7 @@ class _DaftFuncDecorator:
54
55
55
56
Args:
56
57
return_dtype: The data type that this function should return or yield. If not specified, it is derived from the function's return type hint.
58
+ unnest: Whether to unnest/flatten out return type fields into columns. Return dtype must be `DataType.struct` when this is set to true. Defaults to false.
57
59
58
60
Examples:
59
61
Basic Example
@@ -184,21 +186,46 @@ class _DaftFuncDecorator:
184
186
╰───────┴─────────╯
185
187
<BLANKLINE>
186
188
(Showing first 7 of 7 rows)
189
+
190
+ Unnesting multiple return fields
191
+
192
+ >>> import daft
193
+ >>> from daft import DataType
194
+ >>> @daft.func(return_dtype=DataType.struct({"int": DataType.int64(), "str": DataType.string()}), unnest=True)
195
+ ... def my_multi_return(val: int):
196
+ ... return {"int": val * 2, "str": str(val) * 2}
197
+ >>> df = daft.from_pydict({"x": [1, 2, 3]})
198
+ >>> df.select(my_multi_return(df["x"])).collect()
199
+ ╭───────┬──────╮
200
+ │ int ┆ str │
201
+ │ --- ┆ --- │
202
+ │ Int64 ┆ Utf8 │
203
+ ╞═══════╪══════╡
204
+ │ 2 ┆ 11 │
205
+ ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
206
+ │ 4 ┆ 22 │
207
+ ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
208
+ │ 6 ┆ 33 │
209
+ ╰───────┴──────╯
210
+ <BLANKLINE>
211
+ (Showing first 3 of 3 rows)
187
212
"""
188
213
189
214
@overload
190
- def __new__ (cls , * , return_dtype : DataTypeLike | None = None ) -> _PartialUdf : ... # type: ignore[misc]
215
+ def __new__ (cls , * , return_dtype : DataTypeLike | None = None , unnest : bool = False ) -> _PartialUdf : ... # type: ignore[misc]
191
216
@overload
192
217
def __new__ ( # type: ignore[misc]
193
- cls , fn : Callable [P , Iterator [T ]], * , return_dtype : DataTypeLike | None = None
218
+ cls , fn : Callable [P , Iterator [T ]], * , return_dtype : DataTypeLike | None = None , unnest : bool = False
194
219
) -> GeneratorUdf [P , T ]: ...
195
220
@overload
196
- def __new__ (cls , fn : Callable [P , T ], * , return_dtype : DataTypeLike | None = None ) -> RowWiseUdf [P , T ]: ... # type: ignore[misc]
221
+ def __new__ ( # type: ignore[misc]
222
+ cls , fn : Callable [P , T ], * , return_dtype : DataTypeLike | None = None , unnest : bool = False
223
+ ) -> RowWiseUdf [P , T ]: ...
197
224
198
225
def __new__ ( # type: ignore[misc]
199
- cls , fn : Callable [P , Any ] | None = None , * , return_dtype : DataTypeLike | None = None
226
+ cls , fn : Callable [P , Any ] | None = None , * , return_dtype : DataTypeLike | None = None , unnest : bool = False
200
227
) -> _PartialUdf | GeneratorUdf [P , Any ] | RowWiseUdf [P , Any ]:
201
- partial_udf = _PartialUdf (return_dtype = return_dtype )
228
+ partial_udf = _PartialUdf (return_dtype = return_dtype , unnest = unnest )
202
229
return partial_udf if fn is None else partial_udf (fn )
203
230
204
231
0 commit comments