ustream.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. from __future__ import unicode_literals
  2. import json
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. compat_urlparse,
  7. get_meta_content,
  8. )
  9. class UstreamIE(InfoExtractor):
  10. _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
  11. IE_NAME = 'ustream'
  12. _TESTS = [{
  13. 'url': 'http://www.ustream.tv/recorded/20274954',
  14. 'file': '20274954.flv',
  15. 'md5': '088f151799e8f572f84eb62f17d73e5c',
  16. 'info_dict': {
  17. "uploader": "Young Americans for Liberty",
  18. "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
  19. },
  20. },
  21. {
  22. 'url': 'http://www.ustream.tv/embed/17357891',
  23. 'file': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman-45734260.flv',
  24. 'md5': '27b99cdb639c9b12a79bca876a073417',
  25. 'info_dict': {
  26. "uploader": "AU SPA: The NSA and Privacy",
  27. "title": "NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman"
  28. },
  29. }
  30. ]
  31. def _real_extract(self, url):
  32. m = re.match(self._VALID_URL, url)
  33. if m.group('type') == 'embed':
  34. video_id = m.group('videoID')
  35. webpage = self._download_webpage(url, video_id)
  36. desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
  37. desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
  38. return self.url_result(desktop_url, 'Ustream')
  39. video_id = m.group('videoID')
  40. video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
  41. webpage = self._download_webpage(url, video_id)
  42. self.report_extraction(video_id)
  43. video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
  44. webpage, 'title')
  45. uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
  46. webpage, 'uploader', fatal=False, flags=re.DOTALL)
  47. thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
  48. webpage, 'thumbnail', fatal=False)
  49. return {
  50. 'id': video_id,
  51. 'url': video_url,
  52. 'ext': 'flv',
  53. 'title': video_title,
  54. 'uploader': uploader,
  55. 'thumbnail': thumbnail,
  56. }
  57. class UstreamChannelIE(InfoExtractor):
  58. _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
  59. IE_NAME = 'ustream:channel'
  60. def _real_extract(self, url):
  61. m = re.match(self._VALID_URL, url)
  62. slug = m.group('slug')
  63. webpage = self._download_webpage(url, slug)
  64. channel_id = get_meta_content('ustream:channel_id', webpage)
  65. BASE = 'http://www.ustream.tv'
  66. next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
  67. video_ids = []
  68. while next_url:
  69. reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
  70. video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
  71. next_url = reply['nextUrl']
  72. urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
  73. url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
  74. return self.playlist_result(url_entries, channel_id)