medialaan.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..compat import compat_str
  5. from ..utils import (
  6. ExtractorError,
  7. int_or_none,
  8. parse_duration,
  9. try_get,
  10. unified_timestamp,
  11. urlencode_postdata,
  12. )
  13. class MedialaanIE(InfoExtractor):
  14. _VALID_URL = r'''(?x)
  15. https?://
  16. (?:www\.)?
  17. (?:
  18. (?P<site_id>vtm|q2|vtmkzoom)\.be/
  19. (?:
  20. video(?:/[^/]+/id/|/?\?.*?\baid=)|
  21. (?:[^/]+/)*
  22. )
  23. )
  24. (?P<id>[^/?#&]+)
  25. '''
  26. _NETRC_MACHINE = 'medialaan'
  27. _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
  28. _SITE_TO_APP_ID = {
  29. 'vtm': 'vtm_watch',
  30. 'q2': 'q2',
  31. 'vtmkzoom': 'vtmkzoom',
  32. }
  33. _TESTS = [{
  34. # vod
  35. 'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
  36. 'info_dict': {
  37. 'id': 'vtm_20170219_VM0678361_vtmwatch',
  38. 'ext': 'mp4',
  39. 'title': 'Allemaal Chris afl. 6',
  40. 'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
  41. 'timestamp': 1487533280,
  42. 'upload_date': '20170219',
  43. 'duration': 2562,
  44. 'series': 'Allemaal Chris',
  45. 'season': 'Allemaal Chris',
  46. 'season_number': 1,
  47. 'season_id': '256936078124527',
  48. 'episode': 'Allemaal Chris afl. 6',
  49. 'episode_number': 6,
  50. 'episode_id': '256936078591527',
  51. },
  52. 'params': {
  53. 'skip_download': True,
  54. },
  55. 'skip': 'Requires account credentials',
  56. }, {
  57. # clip
  58. 'url': 'http://vtm.be/video?aid=168332',
  59. 'info_dict': {
  60. 'id': '168332',
  61. 'ext': 'mp4',
  62. 'title': '"Veronique liegt!"',
  63. 'description': 'md5:1385e2b743923afe54ba4adc38476155',
  64. 'timestamp': 1489002029,
  65. 'upload_date': '20170308',
  66. 'duration': 96,
  67. },
  68. }, {
  69. # vod
  70. 'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
  71. 'only_matching': True,
  72. }, {
  73. # vod
  74. 'url': 'http://vtm.be/video?aid=163157',
  75. 'only_matching': True,
  76. }, {
  77. # vod
  78. 'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
  79. 'only_matching': True,
  80. }, {
  81. # clip
  82. 'url': 'http://vitaya.be/de-jurk/precies-je-hebt-geen-borsten',
  83. 'only_matching': True,
  84. }, {
  85. # clip
  86. 'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
  87. 'only_matching': True,
  88. }]
  89. def _real_initialize(self):
  90. self._logged_in = False
  91. def _login(self):
  92. username, password = self._get_login_info()
  93. if username is None:
  94. self.raise_login_required()
  95. auth_data = {
  96. 'APIKey': self._APIKEY,
  97. 'sdk': 'js_6.1',
  98. 'format': 'json',
  99. 'loginID': username,
  100. 'password': password,
  101. }
  102. auth_info = self._download_json(
  103. 'https://accounts.eu1.gigya.com/accounts.login', None,
  104. note='Logging in', errnote='Unable to log in',
  105. data=urlencode_postdata(auth_data))
  106. error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
  107. if error_message:
  108. raise ExtractorError(
  109. 'Unable to login: %s' % error_message, expected=True)
  110. self._uid = auth_info['UID']
  111. self._uid_signature = auth_info['UIDSignature']
  112. self._signature_timestamp = auth_info['signatureTimestamp']
  113. self._logged_in = True
  114. def _real_extract(self, url):
  115. mobj = re.match(self._VALID_URL, url)
  116. video_id, site_id = mobj.group('id', 'site_id')
  117. webpage = self._download_webpage(url, video_id)
  118. config = self._parse_json(
  119. self._search_regex(
  120. r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
  121. webpage, 'config', default='{}'), video_id,
  122. transform_source=lambda s: s.replace(
  123. '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
  124. vod_id = config.get('vodId') or self._search_regex(
  125. (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
  126. r'<[^>]+id=["\']vod-(\d+)'),
  127. webpage, 'video_id', default=None)
  128. # clip, no authentication required
  129. if not vod_id:
  130. player = self._parse_json(
  131. self._search_regex(
  132. r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
  133. default=''),
  134. video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
  135. if player:
  136. video = player[-1]
  137. info = {
  138. 'id': video_id,
  139. 'url': video['videoUrl'],
  140. 'title': video['title'],
  141. 'thumbnail': video.get('imageUrl'),
  142. 'timestamp': int_or_none(video.get('createdDate')),
  143. 'duration': int_or_none(video.get('duration')),
  144. }
  145. else:
  146. info = self._parse_html5_media_entries(
  147. url, webpage, video_id, m3u8_id='hls')[0]
  148. info.update({
  149. 'id': video_id,
  150. 'title': self._html_search_meta('description', webpage),
  151. 'duration': parse_duration(self._html_search_meta('duration', webpage)),
  152. })
  153. # vod, authentication required
  154. else:
  155. if not self._logged_in:
  156. self._login()
  157. settings = self._parse_json(
  158. self._search_regex(
  159. r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
  160. webpage, 'drupal settings', default='{}'),
  161. video_id)
  162. def get(container, item):
  163. return try_get(
  164. settings, lambda x: x[container][item],
  165. compat_str) or self._search_regex(
  166. r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
  167. default=None)
  168. app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
  169. sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
  170. data = self._download_json(
  171. 'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
  172. video_id, query={
  173. 'app_id': app_id,
  174. 'user_network': sso,
  175. 'UID': self._uid,
  176. 'UIDSignature': self._uid_signature,
  177. 'signatureTimestamp': self._signature_timestamp,
  178. })
  179. formats = self._extract_m3u8_formats(
  180. data['response']['uri'], video_id, entry_protocol='m3u8_native',
  181. ext='mp4', m3u8_id='hls')
  182. self._sort_formats(formats)
  183. info = {
  184. 'id': vod_id,
  185. 'formats': formats,
  186. }
  187. api_key = get('vod', 'apiKey')
  188. channel = get('medialaanGigya', 'channel')
  189. if api_key:
  190. videos = self._download_json(
  191. 'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
  192. query={
  193. 'channels': channel,
  194. 'ids': vod_id,
  195. 'limit': 1,
  196. 'apikey': api_key,
  197. })
  198. if videos:
  199. video = try_get(
  200. videos, lambda x: x['response']['videos'][0], dict)
  201. if video:
  202. def get(container, item, expected_type=None):
  203. return try_get(
  204. video, lambda x: x[container][item], expected_type)
  205. def get_string(container, item):
  206. return get(container, item, compat_str)
  207. info.update({
  208. 'series': get_string('program', 'title'),
  209. 'season': get_string('season', 'title'),
  210. 'season_number': int_or_none(get('season', 'number')),
  211. 'season_id': get_string('season', 'id'),
  212. 'episode': get_string('episode', 'title'),
  213. 'episode_number': int_or_none(get('episode', 'number')),
  214. 'episode_id': get_string('episode', 'id'),
  215. 'duration': int_or_none(
  216. video.get('duration')) or int_or_none(
  217. video.get('durationMillis'), scale=1000),
  218. 'title': get_string('episode', 'title'),
  219. 'description': get_string('episode', 'text'),
  220. 'timestamp': unified_timestamp(get_string(
  221. 'publication', 'begin')),
  222. })
  223. if not info.get('title'):
  224. info['title'] = try_get(
  225. config, lambda x: x['videoConfig']['title'],
  226. compat_str) or self._html_search_regex(
  227. r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
  228. default=None) or self._og_search_title(webpage)
  229. if not info.get('description'):
  230. info['description'] = self._html_search_regex(
  231. r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
  232. webpage, 'description', default=None)
  233. return info