crackle.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # coding: utf-8
  2. from __future__ import unicode_literals, division
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_str,
  7. compat_HTTPError,
  8. )
  9. from ..utils import (
  10. determine_ext,
  11. float_or_none,
  12. int_or_none,
  13. parse_age_limit,
  14. parse_duration,
  15. ExtractorError
  16. )
  17. class CrackleIE(InfoExtractor):
  18. _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
  19. _TEST = {
  20. # geo restricted to CA
  21. 'url': 'https://www.crackle.com/andromeda/2502343',
  22. 'info_dict': {
  23. 'id': '2502343',
  24. 'ext': 'mp4',
  25. 'title': 'Under The Night',
  26. 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
  27. 'duration': 2583,
  28. 'view_count': int,
  29. 'average_rating': 0,
  30. 'age_limit': 14,
  31. 'genre': 'Action, Sci-Fi',
  32. 'creator': 'Allan Kroeker',
  33. 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
  34. 'release_year': 2000,
  35. 'series': 'Andromeda',
  36. 'episode': 'Under The Night',
  37. 'season_number': 1,
  38. 'episode_number': 1,
  39. },
  40. 'params': {
  41. # m3u8 download
  42. 'skip_download': True,
  43. }
  44. }
  45. def _real_extract(self, url):
  46. video_id = self._match_id(url)
  47. country_code = self._downloader.params.get('geo_bypass_country', None)
  48. countries = [country_code] if country_code else (
  49. 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
  50. last_e = None
  51. for country in countries:
  52. try:
  53. media = self._download_json(
  54. 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
  55. % (video_id, country), video_id,
  56. 'Downloading media JSON as %s' % country,
  57. 'Unable to download media JSON', query={
  58. 'disableProtocols': 'true',
  59. 'format': 'json'
  60. })
  61. except ExtractorError as e:
  62. # 401 means geo restriction, trying next country
  63. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
  64. last_e = e
  65. continue
  66. raise
  67. media_urls = media.get('MediaURLs')
  68. if not media_urls or not isinstance(media_urls, list):
  69. continue
  70. title = media['Title']
  71. formats = []
  72. for e in media['MediaURLs']:
  73. if e.get('UseDRM') is True:
  74. continue
  75. format_url = e.get('Path')
  76. if not format_url or not isinstance(format_url, compat_str):
  77. continue
  78. ext = determine_ext(format_url)
  79. if ext == 'm3u8':
  80. formats.extend(self._extract_m3u8_formats(
  81. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  82. m3u8_id='hls', fatal=False))
  83. elif ext == 'mpd':
  84. formats.extend(self._extract_mpd_formats(
  85. format_url, video_id, mpd_id='dash', fatal=False))
  86. self._sort_formats(formats)
  87. description = media.get('Description')
  88. duration = int_or_none(media.get(
  89. 'DurationInSeconds')) or parse_duration(media.get('Duration'))
  90. view_count = int_or_none(media.get('CountViews'))
  91. average_rating = float_or_none(media.get('UserRating'))
  92. age_limit = parse_age_limit(media.get('Rating'))
  93. genre = media.get('Genre')
  94. release_year = int_or_none(media.get('ReleaseYear'))
  95. creator = media.get('Directors')
  96. artist = media.get('Cast')
  97. if media.get('MediaTypeDisplayValue') == 'Full Episode':
  98. series = media.get('ShowName')
  99. episode = title
  100. season_number = int_or_none(media.get('Season'))
  101. episode_number = int_or_none(media.get('Episode'))
  102. else:
  103. series = episode = season_number = episode_number = None
  104. subtitles = {}
  105. cc_files = media.get('ClosedCaptionFiles')
  106. if isinstance(cc_files, list):
  107. for cc_file in cc_files:
  108. if not isinstance(cc_file, dict):
  109. continue
  110. cc_url = cc_file.get('Path')
  111. if not cc_url or not isinstance(cc_url, compat_str):
  112. continue
  113. lang = cc_file.get('Locale') or 'en'
  114. subtitles.setdefault(lang, []).append({'url': cc_url})
  115. thumbnails = []
  116. images = media.get('Images')
  117. if isinstance(images, list):
  118. for image_key, image_url in images.items():
  119. mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
  120. if not mobj:
  121. continue
  122. thumbnails.append({
  123. 'url': image_url,
  124. 'width': int(mobj.group(1)),
  125. 'height': int(mobj.group(2)),
  126. })
  127. return {
  128. 'id': video_id,
  129. 'title': title,
  130. 'description': description,
  131. 'duration': duration,
  132. 'view_count': view_count,
  133. 'average_rating': average_rating,
  134. 'age_limit': age_limit,
  135. 'genre': genre,
  136. 'creator': creator,
  137. 'artist': artist,
  138. 'release_year': release_year,
  139. 'series': series,
  140. 'episode': episode,
  141. 'season_number': season_number,
  142. 'episode_number': episode_number,
  143. 'thumbnails': thumbnails,
  144. 'subtitles': subtitles,
  145. 'formats': formats,
  146. }
  147. raise last_e