Skip to content

Commit 0719406

Browse files
committed
Always extract original URL from next_url (#318)
1 parent 69d3b2d commit 0719406

File tree

1 file changed

+5
-9
lines changed

1 file changed

+5
-9
lines changed

youtube-dl

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,12 +1171,11 @@ class InfoExtractor(object):
11711171
class YoutubeIE(InfoExtractor):
11721172
"""Information extractor for youtube.com."""
11731173

1174-
_PREFIX = r'(?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)'
1175-
_VALID_URL = r'^('+_PREFIX+r'(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1176-
_VALID_URL_WITH_AGE = r'^('+_PREFIX+')verify_age\?next_url=([^&]+)(?:.+)?$'
1174+
_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
11771175
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
11781176
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
11791177
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1178+
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
11801179
_NETRC_MACHINE = 'youtube'
11811180
# Listed in order of quality
11821181
_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
@@ -1337,13 +1336,10 @@ class YoutubeIE(InfoExtractor):
13371336
return
13381337

13391338
def _real_extract(self, url):
1340-
# Extract original video URL from URL with age verification, using next_url parameter
1341-
mobj = re.match(self._VALID_URL_WITH_AGE, url)
1339+
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
1340+
mobj = re.search(self._NEXT_URL_RE, url)
13421341
if mobj:
1343-
urldecode = lambda x: re.sub(r'%([0-9a-hA-H][0-9a-hA-H])', lambda m: chr(int(m.group(1), 16)), x)
1344-
# Keep original domain. We can probably change to www.youtube.com, but it should not hurt so keep it.
1345-
# We just make sure we do not have double //, in URL, so we strip starting slash in next_url.
1346-
url = mobj.group(1) + re.sub(r'^/', '', urldecode(mobj.group(2)))
1342+
url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
13471343

13481344
# Extract video id from URL
13491345
mobj = re.match(self._VALID_URL, url)

0 commit comments

Comments
 (0)