Kaynağa Gözat

Merge pull request #7599 from lalinsky/fix-youtube

[youtube] More explicit player config JSON extraction (fixes #7468)
Sergey M 10 yıl önce
ebeveyn
işleme
5ae17037a3
1 değiştirilmiş dosya ile 30 ekleme ve 8 silme
  1. 30 8
      youtube_dl/extractor/youtube.py

+ 30 - 8
youtube_dl/extractor/youtube.py

@@ -691,7 +691,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         {
         {
             'url': 'http://vid.plus/FlRa-iH7PGw',
             'url': 'http://vid.plus/FlRa-iH7PGw',
             'only_matching': True,
             'only_matching': True,
-        }
+        },
+        {
+            # Title with JS-like syntax "};"
+            'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
+            'info_dict': {
+                'id': 'lsguqyKfVQg',
+                'ext': 'mp4',
+                'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+                'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+                'upload_date': '20151119',
+                'uploader_id': 'IronSoulElf',
+                'uploader': 'IronSoulElf',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
     ]
 
 
     def __init__(self, *args, **kwargs):
     def __init__(self, *args, **kwargs):
@@ -875,16 +891,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             return {}
             return {}
         return sub_lang_list
         return sub_lang_list
 
 
+    def _get_ytplayer_config(self, webpage):
+        patterns = [
+            r';ytplayer\.config\s*=\s*({.*?});ytplayer',
+            r';ytplayer\.config\s*=\s*({.*?});',
+        ]
+        config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None)
+        if config is not None:
+            return json.loads(uppercase_escape(config))
+
     def _get_automatic_captions(self, video_id, webpage):
     def _get_automatic_captions(self, video_id, webpage):
         """We need the webpage for getting the captions url, pass it as an
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
            argument to speed up the process."""
         self.to_screen('%s: Looking for automatic captions' % video_id)
         self.to_screen('%s: Looking for automatic captions' % video_id)
-        mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
+        player_config = self._get_ytplayer_config(webpage)
         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
-        if mobj is None:
+        if player_config is None:
             self._downloader.report_warning(err_msg)
             self._downloader.report_warning(err_msg)
             return {}
             return {}
-        player_config = json.loads(mobj.group(1))
         try:
         try:
             args = player_config['args']
             args = player_config['args']
             caption_url = args['ttsurl']
             caption_url = args['ttsurl']
@@ -1091,10 +1115,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             age_gate = False
             age_gate = False
             video_info = None
             video_info = None
             # Try looking directly into the video webpage
             # Try looking directly into the video webpage
-            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
-            if mobj:
-                json_code = uppercase_escape(mobj.group(1))
-                ytplayer_config = json.loads(json_code)
+            ytplayer_config = self._get_ytplayer_config(video_webpage)
+            if ytplayer_config is not None:
                 args = ytplayer_config['args']
                 args = ytplayer_config['args']
                 if args.get('url_encoded_fmt_stream_map'):
                 if args.get('url_encoded_fmt_stream_map'):
                     # Convert to the same format returned by compat_parse_qs
                     # Convert to the same format returned by compat_parse_qs