dramafever.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import itertools
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_str,
  7. compat_urlparse,
  8. )
  9. from ..utils import (
  10. clean_html,
  11. ExtractorError,
  12. int_or_none,
  13. parse_age_limit,
  14. parse_duration,
  15. sanitized_Request,
  16. unified_timestamp,
  17. urlencode_postdata
  18. )
  19. class DramaFeverBaseIE(InfoExtractor):
  20. _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
  21. _NETRC_MACHINE = 'dramafever'
  22. _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
  23. _consumer_secret = None
  24. def _get_consumer_secret(self):
  25. mainjs = self._download_webpage(
  26. 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
  27. None, 'Downloading main.js', fatal=False)
  28. if not mainjs:
  29. return self._CONSUMER_SECRET
  30. return self._search_regex(
  31. r"var\s+cs\s*=\s*'([^']+)'", mainjs,
  32. 'consumer secret', default=self._CONSUMER_SECRET)
  33. def _real_initialize(self):
  34. self._login()
  35. self._consumer_secret = self._get_consumer_secret()
  36. def _login(self):
  37. (username, password) = self._get_login_info()
  38. if username is None:
  39. return
  40. login_form = {
  41. 'username': username,
  42. 'password': password,
  43. }
  44. request = sanitized_Request(
  45. self._LOGIN_URL, urlencode_postdata(login_form))
  46. response = self._download_webpage(
  47. request, None, 'Logging in')
  48. if all(logout_pattern not in response
  49. for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
  50. error = self._html_search_regex(
  51. r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
  52. response, 'error message', default=None)
  53. if error:
  54. raise ExtractorError('Unable to login: %s' % error, expected=True)
  55. raise ExtractorError('Unable to log in')
  56. class DramaFeverIE(DramaFeverBaseIE):
  57. IE_NAME = 'dramafever'
  58. _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
  59. _TESTS = [{
  60. 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
  61. 'info_dict': {
  62. 'id': '4274.1',
  63. 'ext': 'wvm',
  64. 'title': 'Heirs - Episode 1',
  65. 'description': 'md5:362a24ba18209f6276e032a651c50bc2',
  66. 'thumbnail': r're:^https?://.*\.jpg',
  67. 'duration': 3783,
  68. 'timestamp': 1381354993,
  69. 'upload_date': '20131009',
  70. 'series': 'Heirs',
  71. 'season_number': 1,
  72. 'episode': 'Episode 1',
  73. 'episode_number': 1,
  74. },
  75. 'params': {
  76. # m3u8 download
  77. 'skip_download': True,
  78. },
  79. }, {
  80. 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
  81. 'info_dict': {
  82. 'id': '4826.4',
  83. 'ext': 'flv',
  84. 'title': 'Mnet Asian Music Awards 2015',
  85. 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
  86. 'episode': 'Mnet Asian Music Awards 2015 - Part 3',
  87. 'episode_number': 4,
  88. 'thumbnail': r're:^https?://.*\.jpg',
  89. 'timestamp': 1450213200,
  90. 'upload_date': '20151215',
  91. 'duration': 5359,
  92. },
  93. 'params': {
  94. # m3u8 download
  95. 'skip_download': True,
  96. },
  97. }, {
  98. 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
  99. 'only_matching': True,
  100. }]
  101. def _call_api(self, path, video_id, note, fatal=False):
  102. return self._download_json(
  103. 'https://www.dramafever.com/api/5/' + path,
  104. video_id, note=note, headers={
  105. 'x-consumer-key': self._consumer_secret,
  106. }, fatal=fatal)
  107. def _get_subtitles(self, video_id):
  108. subtitles = {}
  109. subs = self._call_api(
  110. 'video/%s/subtitles/webvtt/' % video_id, video_id,
  111. 'Downloading subtitles JSON', fatal=False)
  112. if not subs or not isinstance(subs, list):
  113. return subtitles
  114. for sub in subs:
  115. if not isinstance(sub, dict):
  116. continue
  117. sub_url = sub.get('url')
  118. if not sub_url or not isinstance(sub_url, compat_str):
  119. continue
  120. subtitles.setdefault(
  121. sub.get('code') or sub.get('language') or 'en', []).append({
  122. 'url': sub_url
  123. })
  124. return subtitles
  125. def _real_extract(self, url):
  126. video_id = self._match_id(url).replace('/', '.')
  127. series_id, episode_number = video_id.split('.')
  128. video = self._call_api(
  129. 'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
  130. 'Downloading video JSON')
  131. formats = []
  132. download_assets = video.get('download_assets')
  133. if download_assets and isinstance(download_assets, dict):
  134. for format_id, format_dict in download_assets.items():
  135. if not isinstance(format_dict, dict):
  136. continue
  137. format_url = format_dict.get('url')
  138. if not format_url or not isinstance(format_url, compat_str):
  139. continue
  140. formats.append({
  141. 'url': format_url,
  142. 'format_id': format_id,
  143. 'filesize': int_or_none(video.get('filesize')),
  144. })
  145. stream = self._call_api(
  146. 'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
  147. fatal=False)
  148. if stream:
  149. stream_url = stream.get('stream_url')
  150. if stream_url:
  151. formats.extend(self._extract_m3u8_formats(
  152. stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
  153. m3u8_id='hls', fatal=False))
  154. self._sort_formats(formats)
  155. title = video.get('title') or 'Episode %s' % episode_number
  156. description = video.get('description')
  157. thumbnail = video.get('thumbnail')
  158. timestamp = unified_timestamp(video.get('release_date'))
  159. duration = parse_duration(video.get('duration'))
  160. age_limit = parse_age_limit(video.get('tv_rating'))
  161. series = video.get('series_title')
  162. season_number = int_or_none(video.get('season'))
  163. if series:
  164. title = '%s - %s' % (series, title)
  165. subtitles = self.extract_subtitles(video_id)
  166. return {
  167. 'id': video_id,
  168. 'title': title,
  169. 'description': description,
  170. 'thumbnail': thumbnail,
  171. 'duration': duration,
  172. 'timestamp': timestamp,
  173. 'age_limit': age_limit,
  174. 'series': series,
  175. 'season_number': season_number,
  176. 'episode_number': int_or_none(episode_number),
  177. 'formats': formats,
  178. 'subtitles': subtitles,
  179. }
  180. class DramaFeverSeriesIE(DramaFeverBaseIE):
  181. IE_NAME = 'dramafever:series'
  182. _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
  183. _TESTS = [{
  184. 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
  185. 'info_dict': {
  186. 'id': '4512',
  187. 'title': 'Cooking with Shin',
  188. 'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
  189. },
  190. 'playlist_count': 4,
  191. }, {
  192. 'url': 'http://www.dramafever.com/drama/124/IRIS/',
  193. 'info_dict': {
  194. 'id': '124',
  195. 'title': 'IRIS',
  196. 'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
  197. },
  198. 'playlist_count': 20,
  199. }]
  200. _PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
  201. def _real_extract(self, url):
  202. series_id = self._match_id(url)
  203. series = self._download_json(
  204. 'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
  205. % (self._consumer_secret, series_id),
  206. series_id, 'Downloading series JSON')['series'][series_id]
  207. title = clean_html(series['name'])
  208. description = clean_html(series.get('description') or series.get('description_short'))
  209. entries = []
  210. for page_num in itertools.count(1):
  211. episodes = self._download_json(
  212. 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
  213. % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
  214. series_id, 'Downloading episodes JSON page #%d' % page_num)
  215. for episode in episodes.get('value', []):
  216. episode_url = episode.get('episode_url')
  217. if not episode_url:
  218. continue
  219. entries.append(self.url_result(
  220. compat_urlparse.urljoin(url, episode_url),
  221. 'DramaFever', episode.get('guid')))
  222. if page_num == episodes['num_pages']:
  223. break
  224. return self.playlist_result(entries, series_id, title, description)