فهرست منبع

[youtube] Use a cookie for seeting the language

This way, we don't have to do an aditional request
Jaime Marquínez Ferrándiz 11 سال پیش
والد
کامیت
42939b6129
2فایلهای تغییر یافته به همراه11 افزوده شده و 18 حذف شده
  1. 6 0
      youtube_dl/extractor/common.py
  2. 5 18
      youtube_dl/extractor/youtube.py

+ 6 - 0
youtube_dl/extractor/common.py

@@ -13,6 +13,7 @@ import time
 import xml.etree.ElementTree
 import xml.etree.ElementTree
 
 
 from ..compat import (
 from ..compat import (
+    compat_cookiejar,
     compat_http_client,
     compat_http_client,
     compat_urllib_error,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
     compat_urllib_parse_urlparse,
@@ -817,6 +818,11 @@ class InfoExtractor(object):
                 self._downloader.report_warning(msg)
                 self._downloader.report_warning(msg)
         return res
         return res
 
 
+    def _set_cookie(self, domain, name, value, expire_time=None):
+        cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+            None, '/', True, False, expire_time, '', None, None, None)
+        self._downloader.cookiejar.set_cookie(cookie)
+
 
 
 class SearchInfoExtractor(InfoExtractor):
 class SearchInfoExtractor(InfoExtractor):
     """
     """

+ 5 - 18
youtube_dl/extractor/youtube.py

@@ -7,6 +7,7 @@ import itertools
 import json
 import json
 import os.path
 import os.path
 import re
 import re
+import time
 import traceback
 import traceback
 
 
 from .common import InfoExtractor, SearchInfoExtractor
 from .common import InfoExtractor, SearchInfoExtractor
@@ -38,16 +39,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     """Provide base functions for Youtube extractors"""
     """Provide base functions for Youtube extractors"""
     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
     _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
     _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
-    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
     _NETRC_MACHINE = 'youtube'
     _NETRC_MACHINE = 'youtube'
     # If True it will raise an error if no login info is provided
     # If True it will raise an error if no login info is provided
     _LOGIN_REQUIRED = False
     _LOGIN_REQUIRED = False
 
 
     def _set_language(self):
     def _set_language(self):
-        return bool(self._download_webpage(
-            self._LANG_URL, None,
-            note='Setting language', errnote='unable to set language',
-            fatal=False))
+        self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
+            # YouTube sets the expire time to about two months
+            expire_time=time.time() + 60*24*3600)
 
 
     def _login(self):
     def _login(self):
         """
         """
@@ -178,9 +177,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     def _real_initialize(self):
     def _real_initialize(self):
         if self._downloader is None:
         if self._downloader is None:
             return
             return
-        if self._get_login_info()[0] is not None:
-            if not self._set_language():
-                return
+        self._set_language()
         if not self._login():
         if not self._login():
             return
             return
 
 
@@ -667,16 +664,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
 
         # Get video webpage
         # Get video webpage
         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
-        pref_cookies = [
-            c for c in self._downloader.cookiejar
-            if c.domain == '.youtube.com' and c.name == 'PREF']
-        for pc in pref_cookies:
-            if 'hl=' in pc.value:
-                pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
-            else:
-                if pc.value:
-                    pc.value += '&'
-                pc.value += 'hl=en'
         video_webpage = self._download_webpage(url, video_id)
         video_webpage = self._download_webpage(url, video_id)
 
 
         # Attempt to extract SWF player URL
         # Attempt to extract SWF player URL