empflix.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. )
  6. class EmpflixIE(InfoExtractor):
  7. _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<videoid>[^\.]+)\.html'
  8. _TEST = {
  9. u'url': u'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
  10. u'file': u'Amateur-Finger-Fuck-33051.flv',
  11. u'md5': u'5e5cc160f38ca9857f318eb97146e13e',
  12. u'info_dict': {
  13. u"title": u"Amateur Finger Fuck",
  14. u"age_limit": 18,
  15. }
  16. }
  17. def _real_extract(self, url):
  18. mobj = re.match(self._VALID_URL, url)
  19. video_id = mobj.group('videoid')
  20. # Get webpage content
  21. webpage = self._download_webpage(url, video_id)
  22. age_limit = self._rta_search(webpage)
  23. # Get the video title
  24. video_title = self._html_search_regex(r'name="title" value="(?P<title>[^"]*)"',
  25. webpage, u'title').strip()
  26. cfg_url = self._html_search_regex(r'flashvars\.config = escape\("([^"]+)"',
  27. webpage, u'flashvars.config').strip()
  28. cfg_xml = self._download_xml(cfg_url, video_id, note=u'Downloading metadata')
  29. video_url = cfg_xml.find('videoLink').text
  30. info = {'id': video_id,
  31. 'url': video_url,
  32. 'title': video_title,
  33. 'ext': 'flv',
  34. 'age_limit': age_limit}
  35. return [info]