@@ -1171,12 +1171,11 @@ class InfoExtractor(object):
1171
1171
class YoutubeIE (InfoExtractor ):
1172
1172
"""Information extractor for youtube.com."""
1173
1173
1174
- _PREFIX = r'(?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)'
1175
- _VALID_URL = r'^(' + _PREFIX + r'(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1176
- _VALID_URL_WITH_AGE = r'^(' + _PREFIX + ')verify_age\?next_url=([^&]+)(?:.+)?$'
1174
+ _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1177
1175
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
1178
1176
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
1179
1177
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1178
+ _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
1180
1179
_NETRC_MACHINE = 'youtube'
1181
1180
# Listed in order of quality
1182
1181
_available_formats = ['38' , '37' , '22' , '45' , '35' , '44' , '34' , '18' , '43' , '6' , '5' , '17' , '13' ]
@@ -1337,13 +1336,10 @@ class YoutubeIE(InfoExtractor):
1337
1336
return
1338
1337
1339
1338
def _real_extract (self , url ):
1340
- # Extract original video URL from URL with age verification, using next_url parameter
1341
- mobj = re .match (self ._VALID_URL_WITH_AGE , url )
1339
+ # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1340
+ mobj = re .search (self ._NEXT_URL_RE , url )
1342
1341
if mobj :
1343
- urldecode = lambda x : re .sub (r'%([0-9a-hA-H][0-9a-hA-H])' , lambda m : chr (int (m .group (1 ), 16 )), x )
1344
- # Keep original domain. We can probably change to www.youtube.com, but it should not hurt so keep it.
1345
- # We just make sure we do not have double //, in URL, so we strip starting slash in next_url.
1346
- url = mobj .group (1 ) + re .sub (r'^/' , '' , urldecode (mobj .group (2 )))
1342
+ url = 'http://www.youtube.com/' + urllib .unquote (mobj .group (1 )).lstrip ('/' )
1347
1343
1348
1344
# Extract video id from URL
1349
1345
mobj = re .match (self ._VALID_URL , url )
0 commit comments