streamango.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. determine_ext,
  7. int_or_none,
  8. js_to_json,
  9. )
  10. class StreamangoIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
  12. _TESTS = [{
  13. 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
  14. 'md5': 'e992787515a182f55e38fc97588d802a',
  15. 'info_dict': {
  16. 'id': 'clapasobsptpkdfe',
  17. 'ext': 'mp4',
  18. 'title': '20170315_150006.mp4',
  19. }
  20. }, {
  21. # no og:title
  22. 'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
  23. 'info_dict': {
  24. 'id': 'foqebrpftarclpob',
  25. 'ext': 'mp4',
  26. 'title': 'foqebrpftarclpob',
  27. },
  28. 'params': {
  29. 'skip_download': True,
  30. },
  31. }, {
  32. 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
  33. 'only_matching': True,
  34. }]
  35. def _real_extract(self, url):
  36. def decrypt_src(str_, val):
  37. k = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
  38. str_ = re.sub(r'[^A-Za-z0-9+/=]', '', str_)
  39. src = ''
  40. sm = [None] * 4
  41. i = 0
  42. str_len = len(str_)
  43. while i < str_len:
  44. for j in range(4):
  45. sm[j % 4] = k.index(str_[i])
  46. i += 1
  47. charCode = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
  48. src += chr(charCode)
  49. if (sm[2] != 0x40):
  50. charCode = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
  51. src += chr(charCode)
  52. if (sm[3] != 0x40):
  53. charCode = ((sm[2] & 0x3) << 0x6) | sm[3]
  54. src += chr(charCode)
  55. return src
  56. video_id = self._match_id(url)
  57. webpage = self._download_webpage(url, video_id)
  58. title = self._og_search_title(webpage, default=video_id)
  59. formats = []
  60. for format_ in re.findall(r'\(\s*({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
  61. mobj = re.search(r'(src\s*:\s*[^(]\(([^)]*)\)[\s,]*)', format_)
  62. if mobj is None:
  63. continue
  64. format_ = format_.replace(mobj.group(0), '')
  65. video = self._parse_json(
  66. format_, video_id, transform_source=js_to_json, fatal=False)
  67. if not video:
  68. continue
  69. mobj = re.search(r'[\'"](?P<src>[^\'"]+)[\'"]\s*,\s*(?P<val>\d+)', mobj.group(1))
  70. if mobj is None:
  71. continue
  72. src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
  73. ext = determine_ext(src, default_ext=None)
  74. if video.get('type') == 'application/dash+xml' or ext == 'mpd':
  75. formats.extend(self._extract_mpd_formats(
  76. src, video_id, mpd_id='dash', fatal=False))
  77. else:
  78. formats.append({
  79. 'url': src,
  80. 'ext': ext or 'mp4',
  81. 'width': int_or_none(video.get('width')),
  82. 'height': int_or_none(video.get('height')),
  83. 'tbr': int_or_none(video.get('bitrate')),
  84. })
  85. self._sort_formats(formats)
  86. return {
  87. 'id': video_id,
  88. 'url': url,
  89. 'title': title,
  90. 'formats': formats,
  91. }