Просмотр исходного кода

Merge remote-tracking branch 'yasoob/master'

Philipp Hagemeister 12 лет назад
Родитель
Сommit
d746cd88c2
3 измененных файлов с 43 добавлено и 0 удалено
  1. 9 0
      test/tests.json
  2. 2 0
      youtube_dl/extractor/__init__.py
  3. 32 0
      youtube_dl/extractor/tudou.py

+ 9 - 0
test/tests.json

@@ -686,5 +686,14 @@
         "upload_date": "20130624",
         "upload_date": "20130624",
         "uploader": "Hurts"
         "uploader": "Hurts"
     }
     }
+  },
+  {
+    "name": "Tudou",
+    "url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html",
+    "file": "159447792.f4v",
+    "md5": "ad7c358a01541e926a1e413612c6b10a",
+    "info_dict": {
+        "title": "卡马乔国足开大脚长传冲吊集锦"
+    }
   }
   }
 ]
 ]

+ 2 - 0
youtube_dl/extractor/__init__.py

@@ -58,6 +58,7 @@ from .youku import YoukuIE
 from .youporn import YouPornIE
 from .youporn import YouPornIE
 from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
 from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
 from .zdf import ZDFIE
 from .zdf import ZDFIE
+from .tudou import TudouIE
 
 
 def gen_extractors():
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     """ Return a list of an instance of every supported extractor.
@@ -129,6 +130,7 @@ def gen_extractors():
         BreakIE(),
         BreakIE(),
         VevoIE(),
         VevoIE(),
         JukeboxIE(),
         JukeboxIE(),
+        TudouIE(),
         GenericIE()
         GenericIE()
     ]
     ]
 
 

+ 32 - 0
youtube_dl/extractor/tudou.py

@@ -0,0 +1,32 @@
+import re
+
+from .common import InfoExtractor
+
+
+class TudouIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(2).replace('.html','')
+        webpage = self._download_webpage(url, video_id)
+        video_id = re.search('"k":(.+?),',webpage).group(1)
+        title = re.search(",kw:\"(.+)\"",webpage)
+        if title is None:
+            title = re.search(",kw: \'(.+)\'",webpage)
+        title = title.group(1)
+        thumbnail_url = re.search(",pic: \'(.+?)\'",webpage)
+        if thumbnail_url is None:
+            thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
+        thumbnail_url = thumbnail_url.group(1)
+        info_url = "http://v2.tudou.com/f?id="+str(video_id)
+        webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
+        final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
+        ext = (final_url.split('?')[0]).split('.')[-1]
+        return [{
+            'id':        video_id,
+            'url':       final_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+        }]