europa.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. compat_urlparse,
  6. xpath_text
  7. )
  8. class EuropaIE(InfoExtractor):
  9. _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?(?:[^&]|&(?!ref))*ref=(?P<id>[A-Za-z0-9]+)'
  10. _TEST = {
  11. 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
  12. 'md5': '728cca2fd41d5aa7350cec1141fbe620',
  13. 'info_dict': {
  14. 'id': 'I107758',
  15. 'ext': 'mp4',
  16. 'title': 'TRADE - Wikileaks on TTIP',
  17. 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
  18. 'thumbnail': 're:^http://defiris\.ec\.streamcloud\.be/findmedia/18/107758/THUMB_[0-9A-Z]+\.jpg$'
  19. }
  20. }
  21. def _real_extract(self, url):
  22. video_id = self._match_id(url)
  23. query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  24. lang = query.get('sitelang', ['en'])[0]
  25. playlist = self._download_xml('http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=' + video_id, video_id)
  26. videos = {}
  27. formats = []
  28. for item in playlist.findall('info/title/item'):
  29. videos[xpath_text(item, 'lg')] = {'title': xpath_text(item, 'label').strip()}
  30. for item in playlist.findall('info/description/item'):
  31. videos[xpath_text(item, 'lg')]['description'] = xpath_text(item, 'label').strip()
  32. for item in playlist.findall('files/file'):
  33. lg = xpath_text(item, 'lg')
  34. vid = videos[lg]
  35. vid['format_note'] = xpath_text(item, 'lglabel')
  36. vid['url'] = xpath_text(item, 'url')
  37. if lg == lang:
  38. vid['language_preference'] = 10
  39. formats.append(vid)
  40. formats.reverse()
  41. def_video = videos.get(lang, videos['int'])
  42. return {
  43. 'id': video_id,
  44. 'title': def_video['title'],
  45. 'description': def_video['description'],
  46. 'thumbnail': xpath_text(playlist, 'info/thumburl', 'thumburl'),
  47. 'formats': formats
  48. }