playvid.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. compat_urllib_parse,
  6. determine_ext,
  7. )
  8. class PlayvidIE(InfoExtractor):
  9. _VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(#|$)'
  10. _TEST = {
  11. 'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
  12. 'file': 'agbDDi7WZTV.mp4',
  13. 'md5': '44930f8afa616efdf9482daf4fe53e1e',
  14. 'info_dict': {
  15. 'title': 'Michelle Lewin in Miami Beach',
  16. 'duration': 240,
  17. 'age_limit': 18,
  18. }
  19. }
  20. def _real_extract(self, url):
  21. mobj = re.match(self._VALID_URL, url)
  22. video_id = mobj.group('id')
  23. webpage = self._download_webpage(url, video_id)
  24. self.report_extraction(video_id)
  25. video_title = None
  26. duration = None
  27. video_thumbnail = None
  28. formats = []
  29. # most of the information is stored in the flashvars
  30. flashvars_match = re.search(r'flashvars="(.+?)"',webpage)
  31. if flashvars_match:
  32. infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&amp;')
  33. for info in infos:
  34. videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info)
  35. if videovars_match:
  36. key = videovars_match.group(1)
  37. val = videovars_match.group(2)
  38. if key == 'title':
  39. video_title = val.replace('+',' ')
  40. if key == 'duration':
  41. try:
  42. duration = val
  43. except ValueError:
  44. duration = None
  45. if key == 'big_thumb':
  46. video_thumbnail = val
  47. videourl_match = re.match(r'^video_urls\]\[(?P<resolution>\d+)p',key)
  48. if videourl_match:
  49. resolution = int(videourl_match.group('resolution'))
  50. formats.append({
  51. 'resolution': resolution, # 360, 480, ...
  52. 'ext': determine_ext(val),
  53. 'url': val
  54. })
  55. # fatal error, if no download url is found
  56. if len(formats) == 0:
  57. raise ExtractorError,'no video url found'
  58. # Extract title - should be in the flashvars; if not, look elsewhere
  59. if video_title is None:
  60. video_title = self._html_search_regex(
  61. r'<title>(.*?)</title', webpage, 'title')
  62. return {
  63. 'id': video_id,
  64. 'formats': formats,
  65. 'title': video_title,
  66. 'thumbnail': video_thumbnail,
  67. 'duration': duration,
  68. 'description': None,
  69. 'age_limit': 18
  70. }