1
1
from __future__ import annotations
2
2
3
+ import os
4
+ import tempfile
5
+ from contextlib import contextmanager
3
6
from dataclasses import dataclass
4
7
from typing import TYPE_CHECKING , Any
5
8
from urllib .parse import urlparse
@@ -131,6 +134,7 @@ def schema(self) -> Schema:
131
134
)
132
135
133
136
def _list_frames (self , path : str , file : Any ) -> Generator [_VideoFrame ]:
137
+ container = None
134
138
try :
135
139
container = av .open (file )
136
140
@@ -147,8 +151,6 @@ def _list_frames(self, path: str, file: Any) -> Generator[_VideoFrame]:
147
151
frame = next (container .decode (stream ))
148
152
except StopIteration :
149
153
break
150
- except Exception :
151
- continue # skip decoding errors
152
154
153
155
if self .is_key_frame is not None and self .is_key_frame != frame .key_frame :
154
156
continue # skip based on is_key_frame filter
@@ -171,28 +173,70 @@ def _list_frames(self, path: str, file: Any) -> Generator[_VideoFrame]:
171
173
)
172
174
frame_index += 1
173
175
finally :
174
- container .close ()
176
+ if container :
177
+ container .close ()
175
178
176
179
def _open (self ) -> Any :
177
180
if _is_youtube_url (self .path ):
178
- import requests
179
- import yt_dlp
180
-
181
- with yt_dlp .YoutubeDL ({"format" : "mp4" , "quiet" : True }) as ydl :
182
- info = ydl .extract_info (self .path , download = False )
183
- if "url" in info :
184
- direct_url = info ["url" ]
185
- elif "entries" in info and len (info ["entries" ]) > 0 and "url" in info ["entries" ][0 ]:
186
- direct_url = info ["entries" ][0 ]["url" ]
187
- else :
188
- raise ValueError ("Could not extract URL from youtube video." )
189
- response = requests .get (direct_url , stream = True )
190
- response .raise_for_status ()
191
- return response .raw
181
+ return self ._open_youtube_file ()
192
182
else :
193
183
fp , fs , _ = _infer_filesystem (self .path , io_config = self .io_config )
194
184
return fs .open_input_file (fp )
195
185
186
+ @contextmanager
187
+ def _open_youtube_file (self ) -> Any :
188
+ import yt_dlp
189
+
190
+ def selector (ctx ): # type: ignore
191
+ best_fmt = None
192
+ best_fit = float ("inf" ) # lower is better
193
+
194
+ for fmt in ctx ["formats" ]:
195
+ if fmt .get ("ext" ) != "mp4" :
196
+ continue
197
+
198
+ h , w = fmt .get ("height" ), fmt .get ("width" )
199
+ if h is None or w is None :
200
+ continue
201
+
202
+ fit = abs (h - self .image_height ) + abs (w - self .image_width )
203
+ if fit == 0 :
204
+ yield fmt
205
+ return
206
+ if fit < best_fit :
207
+ best_fmt = fmt
208
+ best_fit = fit
209
+
210
+ yield best_fmt
211
+
212
+ # Note:
213
+ # Streaming youtube downloads requires deeper work and was error-prone.
214
+ # The parsed youtube urls use m3u8 which requires decoding which ydl
215
+ # will handle for us. We cannot reliable pass a file-like HTTP response
216
+ # to PyAV; hence why this will download to a tempfile then open the file.
217
+
218
+ temp_file = next (tempfile ._get_candidate_names ()) # type: ignore
219
+
220
+ params = {
221
+ "format" : selector ,
222
+ "quiet" : True ,
223
+ "outtmpl" : temp_file ,
224
+ "no_warnings" : True ,
225
+ "extract_flat" : False ,
226
+ "no_check_certificate" : True ,
227
+ "ignoreerrors" : False ,
228
+ "consoletitle" : False ,
229
+ "noprogress" : True ,
230
+ }
231
+
232
+ try :
233
+ with yt_dlp .YoutubeDL (params ) as ydl :
234
+ ydl .download ([self .path ])
235
+ yield open (temp_file , mode = "rb" )
236
+ finally :
237
+ if os .path .exists (temp_file ):
238
+ os .remove (temp_file )
239
+
196
240
def get_micro_partitions (self ) -> Iterator [MicroPartition ]:
197
241
with self ._open () as file :
198
242
buffer = _VideoFramesBuffer (
0 commit comments