sexykarma.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. unified_strdate,
  7. parse_duration,
  8. int_or_none,
  9. )
  10. class SexyKarmaIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
  12. _TESTS = [{
  13. 'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
  14. 'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
  15. 'info_dict': {
  16. 'id': 'yHI70cOyIHt',
  17. 'display_id': 'taking-a-quick-pee',
  18. 'ext': 'mp4',
  19. 'title': 'Taking a quick pee.',
  20. 'description': '',
  21. 'thumbnail': 're:^https?://.*\.jpg$',
  22. 'uploader': 'wildginger7',
  23. 'upload_date': '20141007',
  24. 'duration': 81,
  25. 'view_count': int,
  26. 'comment_count': int,
  27. 'categories': list,
  28. }
  29. }, {
  30. 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
  31. 'md5': 'dd216c68d29b49b12842b9babe762a5d',
  32. 'info_dict': {
  33. 'id': '8Id6EZPbuHf',
  34. 'display_id': 'pot-pixie-tribute',
  35. 'ext': 'mp4',
  36. 'title': 'pot_pixie tribute',
  37. 'description': 'tribute',
  38. 'thumbnail': 're:^https?://.*\.jpg$',
  39. 'uploader': 'banffite',
  40. 'upload_date': '20141013',
  41. 'duration': 16,
  42. 'view_count': int,
  43. 'comment_count': int,
  44. 'categories': list,
  45. }
  46. }]
  47. def _real_extract(self, url):
  48. mobj = re.match(self._VALID_URL, url)
  49. video_id = mobj.group('id')
  50. display_id = mobj.group('display_id')
  51. webpage = self._download_webpage(url, display_id)
  52. video_url = self._html_search_regex(
  53. r'<p>Save this video to your computer: </p><p><a href="([^"]+)"',
  54. webpage, 'url')
  55. title = self._html_search_regex(
  56. r'<h2 class="he2"><span>(.*?)</span>',
  57. webpage, 'title')
  58. description = self._html_search_meta(
  59. 'description', webpage, 'description', fatal=False, default='')
  60. thumbnail = self._html_search_regex(
  61. r'<span id="container"><img\s+src="([^"]+)"',
  62. webpage, 'thumbnail', fatal=False)
  63. uploader = self._html_search_regex(
  64. r'class="aupa">\s*(.*?)</a>',
  65. webpage, 'uploader')
  66. upload_date = unified_strdate(self._html_search_regex(
  67. r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
  68. duration = parse_duration(self._search_regex(
  69. r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
  70. webpage, 'duration', fatal=False))
  71. view_count = int_or_none(self._search_regex(
  72. r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
  73. webpage, 'view count', fatal=False))
  74. comment_count = int_or_none(self._search_regex(
  75. r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
  76. webpage, 'comment count', fatal=False))
  77. categories = self._html_search_meta(
  78. 'keywords', webpage, 'categories',
  79. fatal=False, default='').split(',')
  80. return {
  81. 'id': video_id,
  82. 'display_id': display_id,
  83. 'url': video_url,
  84. 'title': title,
  85. 'description': description,
  86. 'thumbnail': thumbnail,
  87. 'uploader': uploader,
  88. 'upload_date': upload_date,
  89. 'duration': duration,
  90. 'view_count': view_count,
  91. 'comment_count': comment_count,
  92. 'categories': categories,
  93. }