iprima.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import time
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. determine_ext,
  8. js_to_json,
  9. )
  10. class IPrimaIE(InfoExtractor):
  11. _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
  12. _TESTS = [{
  13. 'url': 'http://play.iprima.cz/gondici-s-r-o-33',
  14. 'info_dict': {
  15. 'id': 'p136534',
  16. 'ext': 'mp4',
  17. 'title': 'Gondíci s. r. o. (34)',
  18. 'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
  19. },
  20. 'params': {
  21. 'skip_download': True, # m3u8 download
  22. },
  23. }, {
  24. 'url': 'http://play.iprima.cz/particka/particka-92',
  25. 'only_matching': True,
  26. }]
  27. def _real_extract(self, url):
  28. video_id = self._match_id(url)
  29. webpage = self._download_webpage(url, video_id)
  30. video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
  31. playerpage = self._download_webpage(
  32. 'http://play.iprima.cz/prehravac/init',
  33. video_id, note='Downloading player', query={
  34. '_infuse': 1,
  35. '_ts': round(time.time()),
  36. 'productId': video_id,
  37. }, headers={'Referer': url})
  38. formats = []
  39. def extract_formats(format_url, format_key=None, lang=None):
  40. ext = determine_ext(format_url)
  41. new_formats = []
  42. if format_key == 'hls' or ext == 'm3u8':
  43. new_formats = self._extract_m3u8_formats(
  44. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  45. m3u8_id='hls', fatal=False)
  46. elif format_key == 'dash' or ext == 'mpd':
  47. return
  48. new_formats = self._extract_mpd_formats(
  49. format_url, video_id, mpd_id='dash', fatal=False)
  50. if lang:
  51. for f in new_formats:
  52. if not f.get('language'):
  53. f['language'] = lang
  54. formats.extend(new_formats)
  55. options = self._parse_json(
  56. self._search_regex(
  57. r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
  58. playerpage, 'player options', default='{}'),
  59. video_id, transform_source=js_to_json, fatal=False)
  60. if options:
  61. for key, tracks in options.get('tracks', {}).items():
  62. if not isinstance(tracks, list):
  63. continue
  64. for track in tracks:
  65. src = track.get('src')
  66. if src:
  67. extract_formats(src, key.lower(), track.get('lang'))
  68. if not formats:
  69. for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
  70. extract_formats(src)
  71. if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
  72. self.raise_geo_restricted()
  73. self._sort_formats(formats)
  74. return {
  75. 'id': video_id,
  76. 'title': self._og_search_title(webpage),
  77. 'thumbnail': self._og_search_thumbnail(webpage),
  78. 'formats': formats,
  79. 'description': self._og_search_description(webpage),
  80. }