Kaynağa Gözat

[youtube] Fall back to the original regex for ytplayer.config

Lukáš Lalinský 10 yıl önce
ebeveyn
işleme
0e49d9a6b0
1 değiştirilmiş dosya ile 31 ekleme ve 8 silme
  1. 31 8
      youtube_dl/extractor/youtube.py

+ 31 - 8
youtube_dl/extractor/youtube.py

@@ -674,7 +674,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         {
         {
             'url': 'http://vid.plus/FlRa-iH7PGw',
             'url': 'http://vid.plus/FlRa-iH7PGw',
             'only_matching': True,
             'only_matching': True,
-        }
+        },
+        {
+            # Title with JS-like syntax "};"
+            'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
+            'info_dict': {
+                'id': 'lsguqyKfVQg',
+                'ext': 'mp4',
+                'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+                'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+                'upload_date': '20151119',
+                'uploader_id': 'IronSoulElf',
+                'uploader': 'IronSoulElf',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
     ]
 
 
     def __init__(self, *args, **kwargs):
     def __init__(self, *args, **kwargs):
@@ -858,16 +874,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             return {}
             return {}
         return sub_lang_list
         return sub_lang_list
 
 
+    def _get_ytplayer_config(self, webpage):
+        patterns = [
+            r';ytplayer\.config\s*=\s*({.*?});ytplayer',
+            r';ytplayer\.config\s*=\s*({.*?});',
+        ]
+        for pattern in patterns:
+            config = self._search_regex(pattern, webpage, 'ytconfig.player', default=None)
+            if config is not None:
+                return json.loads(uppercase_escape(config))
+
     def _get_automatic_captions(self, video_id, webpage):
     def _get_automatic_captions(self, video_id, webpage):
         """We need the webpage for getting the captions url, pass it as an
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
            argument to speed up the process."""
         self.to_screen('%s: Looking for automatic captions' % video_id)
         self.to_screen('%s: Looking for automatic captions' % video_id)
-        mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
+        player_config = self._get_ytplayer_config(webpage)
         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
-        if mobj is None:
+        if player_config is None:
             self._downloader.report_warning(err_msg)
             self._downloader.report_warning(err_msg)
             return {}
             return {}
-        player_config = json.loads(mobj.group(1))
         try:
         try:
             args = player_config['args']
             args = player_config['args']
             caption_url = args['ttsurl']
             caption_url = args['ttsurl']
@@ -1074,10 +1099,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             age_gate = False
             age_gate = False
             video_info = None
             video_info = None
             # Try looking directly into the video webpage
             # Try looking directly into the video webpage
-            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});ytplayer', video_webpage)
-            if mobj:
-                json_code = uppercase_escape(mobj.group(1))
-                ytplayer_config = json.loads(json_code)
+            ytplayer_config = self._get_ytplayer_config(video_webpage)
+            if ytplayer_config is not None:
                 args = ytplayer_config['args']
                 args = ytplayer_config['args']
                 if args.get('url_encoded_fmt_stream_map'):
                 if args.get('url_encoded_fmt_stream_map'):
                     # Convert to the same format returned by compat_parse_qs
                     # Convert to the same format returned by compat_parse_qs