indavideo.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .. import utils
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. int_or_none,
  7. parse_age_limit,
  8. parse_iso8601,
  9. )
  10. class IndavideoEmbedIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
  12. _TESTS = [{
  13. 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
  14. 'md5': 'f79b009c66194acacd40712a6778acfa',
  15. 'info_dict': {
  16. 'id': '1837039',
  17. 'ext': 'mp4',
  18. 'title': 'Cicatánc',
  19. 'description': '',
  20. 'thumbnail': 're:^https?://.*\.jpg$',
  21. 'uploader': 'cukiajanlo',
  22. 'uploader_id': '83729',
  23. 'timestamp': 1439193826,
  24. 'upload_date': '20150810',
  25. 'duration': 72,
  26. 'age_limit': 0,
  27. 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
  28. },
  29. }, {
  30. 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
  31. 'only_matching': True,
  32. }, {
  33. 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
  34. 'only_matching': True,
  35. }]
  36. def _real_extract(self, url):
  37. video_id = self._match_id(url)
  38. video = self._download_json(
  39. 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
  40. video_id)['data']
  41. video_id = video['id']
  42. title = video['title']
  43. video_urls = video.get('video_files', [])
  44. video_file = video.get('video_file')
  45. if video:
  46. video_urls.append(video_file)
  47. video_urls = list(set(video_urls))
  48. video_prefix = video_urls[0].rsplit('/', 1)[0]
  49. for flv_file in video.get('flv_files', []):
  50. flv_url = '%s/%s' % (video_prefix, flv_file)
  51. if flv_url not in video_urls:
  52. video_urls.append(flv_url)
  53. formats = [{
  54. 'url': video_url,
  55. 'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None),
  56. } for video_url in video_urls]
  57. self._sort_formats(formats)
  58. timestamp = video.get('date')
  59. if timestamp:
  60. # upload date is in CEST
  61. timestamp = parse_iso8601(timestamp + ' +0200', ' ')
  62. thumbnails = [{
  63. 'url': self._proto_relative_url(thumbnail)
  64. } for thumbnail in video.get('thumbnails', [])]
  65. tags = [tag['title'] for tag in video.get('tags', [])]
  66. return {
  67. 'id': video_id,
  68. 'title': title,
  69. 'description': video.get('description'),
  70. 'thumbnails': thumbnails,
  71. 'uploader': video.get('user_name'),
  72. 'uploader_id': video.get('user_id'),
  73. 'timestamp': timestamp,
  74. 'duration': int_or_none(video.get('length')),
  75. 'age_limit': parse_age_limit(video.get('age_limit')),
  76. 'tags': tags,
  77. 'formats': formats,
  78. }
  79. class IndavideoIE(InfoExtractor):
  80. _VALID_URL = r'https?://(?:www\.)?indavideo\.hu/video/(?P<id>[^/#?]+)'
  81. _TEST = {
  82. 'url': 'http://indavideo.hu/video/Vicces_cica_1',
  83. 'md5': '8c82244ba85d2a2310275b318eb51eac',
  84. 'info_dict': {
  85. 'id': '1335611',
  86. 'display_id': 'Vicces_cica_1',
  87. 'ext': 'mp4',
  88. 'title': 'Vicces cica',
  89. 'description': 'Játszik a tablettel. :D',
  90. 'thumbnail': 're:^https?://.*\.jpg$',
  91. 'uploader': 'Jet_Pack',
  92. 'uploader_id': '491217',
  93. 'timestamp': 1390821212,
  94. 'upload_date': '20140127',
  95. 'duration': 7,
  96. 'age_limit': 0,
  97. 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'],
  98. },
  99. }
  100. def _real_extract(self, url):
  101. display_id = self._match_id(url)
  102. webpage = self._download_webpage(url, display_id)
  103. embed_url = self._search_regex(
  104. r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url')
  105. return {
  106. '_type': 'url_transparent',
  107. 'ie_key': 'IndavideoEmbed',
  108. 'url': embed_url,
  109. 'display_id': display_id,
  110. }