pornhub.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import functools
  4. import itertools
  5. import operator
  6. import re
  7. from .common import InfoExtractor
  8. from ..compat import (
  9. compat_HTTPError,
  10. compat_str,
  11. compat_urllib_request,
  12. )
  13. from .openload import PhantomJSwrapper
  14. from ..utils import (
  15. determine_ext,
  16. ExtractorError,
  17. int_or_none,
  18. merge_dicts,
  19. NO_DEFAULT,
  20. orderedSet,
  21. remove_quotes,
  22. str_to_int,
  23. update_url_query,
  24. urlencode_postdata,
  25. url_or_none,
  26. )
  27. class PornHubBaseIE(InfoExtractor):
  28. _NETRC_MACHINE = 'pornhub'
  29. def _download_webpage_handle(self, *args, **kwargs):
  30. def dl(*args, **kwargs):
  31. return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
  32. ret = dl(*args, **kwargs)
  33. if not ret:
  34. return ret
  35. webpage, urlh = ret
  36. if any(re.search(p, webpage) for p in (
  37. r'<body\b[^>]+\bonload=["\']go\(\)',
  38. r'document\.cookie\s*=\s*["\']RNKEY=',
  39. r'document\.location\.reload\(true\)')):
  40. url_or_request = args[0]
  41. url = (url_or_request.get_full_url()
  42. if isinstance(url_or_request, compat_urllib_request.Request)
  43. else url_or_request)
  44. phantom = PhantomJSwrapper(self, required_version='2.0')
  45. phantom.get(url, html=webpage)
  46. webpage, urlh = dl(*args, **kwargs)
  47. return webpage, urlh
  48. def _real_initialize(self):
  49. self._logged_in = False
  50. def _login(self, host):
  51. if self._logged_in:
  52. return
  53. site = host.split('.')[0]
  54. # Both sites pornhub and pornhubpremium have separate accounts
  55. # so there should be an option to provide credentials for both.
  56. # At the same time some videos are available under the same video id
  57. # on both sites so that we have to identify them as the same video.
  58. # For that purpose we have to keep both in the same extractor
  59. # but under different netrc machines.
  60. username, password = self._get_login_info(netrc_machine=site)
  61. if username is None:
  62. return
  63. login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
  64. login_page = self._download_webpage(
  65. login_url, None, 'Downloading %s login page' % site)
  66. def is_logged(webpage):
  67. return any(re.search(p, webpage) for p in (
  68. r'class=["\']signOut',
  69. r'>Sign\s+[Oo]ut\s*<'))
  70. if is_logged(login_page):
  71. self._logged_in = True
  72. return
  73. login_form = self._hidden_inputs(login_page)
  74. login_form.update({
  75. 'username': username,
  76. 'password': password,
  77. })
  78. response = self._download_json(
  79. 'https://www.%s/front/authenticate' % host, None,
  80. 'Logging in to %s' % site,
  81. data=urlencode_postdata(login_form),
  82. headers={
  83. 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  84. 'Referer': login_url,
  85. 'X-Requested-With': 'XMLHttpRequest',
  86. })
  87. if response.get('success') == '1':
  88. self._logged_in = True
  89. return
  90. message = response.get('message')
  91. if message is not None:
  92. raise ExtractorError(
  93. 'Unable to login: %s' % message, expected=True)
  94. raise ExtractorError('Unable to log in')
  95. class PornHubIE(PornHubBaseIE):
  96. IE_DESC = 'PornHub and Thumbzilla'
  97. _VALID_URL = r'''(?x)
  98. https?://
  99. (?:
  100. (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
  101. (?:www\.)?thumbzilla\.com/video/
  102. )
  103. (?P<id>[\da-z]+)
  104. '''
  105. _TESTS = [{
  106. 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
  107. 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
  108. 'info_dict': {
  109. 'id': '648719015',
  110. 'ext': 'mp4',
  111. 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
  112. 'uploader': 'Babes',
  113. 'upload_date': '20130628',
  114. 'timestamp': 1372447216,
  115. 'duration': 361,
  116. 'view_count': int,
  117. 'like_count': int,
  118. 'dislike_count': int,
  119. 'comment_count': int,
  120. 'age_limit': 18,
  121. 'tags': list,
  122. 'categories': list,
  123. },
  124. }, {
  125. # non-ASCII title
  126. 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
  127. 'info_dict': {
  128. 'id': '1331683002',
  129. 'ext': 'mp4',
  130. 'title': '重庆婷婷女王足交',
  131. 'upload_date': '20150213',
  132. 'timestamp': 1423804862,
  133. 'duration': 1753,
  134. 'view_count': int,
  135. 'like_count': int,
  136. 'dislike_count': int,
  137. 'comment_count': int,
  138. 'age_limit': 18,
  139. 'tags': list,
  140. 'categories': list,
  141. },
  142. 'params': {
  143. 'skip_download': True,
  144. },
  145. }, {
  146. # subtitles
  147. 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
  148. 'info_dict': {
  149. 'id': 'ph5af5fef7c2aa7',
  150. 'ext': 'mp4',
  151. 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
  152. 'uploader': 'BFFs',
  153. 'duration': 622,
  154. 'view_count': int,
  155. 'like_count': int,
  156. 'dislike_count': int,
  157. 'comment_count': int,
  158. 'age_limit': 18,
  159. 'tags': list,
  160. 'categories': list,
  161. 'subtitles': {
  162. 'en': [{
  163. "ext": 'srt'
  164. }]
  165. },
  166. },
  167. 'params': {
  168. 'skip_download': True,
  169. },
  170. 'skip': 'This video has been disabled',
  171. }, {
  172. 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
  173. 'only_matching': True,
  174. }, {
  175. # removed at the request of cam4.com
  176. 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
  177. 'only_matching': True,
  178. }, {
  179. # removed at the request of the copyright owner
  180. 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
  181. 'only_matching': True,
  182. }, {
  183. # removed by uploader
  184. 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
  185. 'only_matching': True,
  186. }, {
  187. # private video
  188. 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
  189. 'only_matching': True,
  190. }, {
  191. 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
  192. 'only_matching': True,
  193. }, {
  194. 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
  195. 'only_matching': True,
  196. }, {
  197. 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
  198. 'only_matching': True,
  199. }, {
  200. 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
  201. 'only_matching': True,
  202. }, {
  203. 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
  204. 'only_matching': True,
  205. }, {
  206. # Some videos are available with the same id on both premium
  207. # and non-premium sites (e.g. this and the following test)
  208. 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
  209. 'only_matching': True,
  210. }, {
  211. 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
  212. 'only_matching': True,
  213. }]
  214. @staticmethod
  215. def _extract_urls(webpage):
  216. return re.findall(
  217. r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
  218. webpage)
  219. def _extract_count(self, pattern, webpage, name):
  220. return str_to_int(self._search_regex(
  221. pattern, webpage, '%s count' % name, fatal=False))
  222. def _real_extract(self, url):
  223. mobj = re.match(self._VALID_URL, url)
  224. host = mobj.group('host') or 'pornhub.com'
  225. video_id = mobj.group('id')
  226. self._login(host)
  227. self._set_cookie(host, 'age_verified', '1')
  228. def dl_webpage(platform):
  229. self._set_cookie(host, 'platform', platform)
  230. return self._download_webpage(
  231. 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
  232. video_id, 'Downloading %s webpage' % platform)
  233. webpage = dl_webpage('pc')
  234. error_msg = self._html_search_regex(
  235. r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
  236. webpage, 'error message', default=None, group='error')
  237. if error_msg:
  238. error_msg = re.sub(r'\s+', ' ', error_msg)
  239. raise ExtractorError(
  240. 'PornHub said: %s' % error_msg,
  241. expected=True, video_id=video_id)
  242. # video_title from flashvars contains whitespace instead of non-ASCII (see
  243. # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
  244. # on that anymore.
  245. title = self._html_search_meta(
  246. 'twitter:title', webpage, default=None) or self._html_search_regex(
  247. (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
  248. r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
  249. r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
  250. webpage, 'title', group='title')
  251. video_urls = []
  252. video_urls_set = set()
  253. subtitles = {}
  254. flashvars = self._parse_json(
  255. self._search_regex(
  256. r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
  257. video_id)
  258. if flashvars:
  259. subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
  260. if subtitle_url:
  261. subtitles.setdefault('en', []).append({
  262. 'url': subtitle_url,
  263. 'ext': 'srt',
  264. })
  265. thumbnail = flashvars.get('image_url')
  266. duration = int_or_none(flashvars.get('video_duration'))
  267. media_definitions = flashvars.get('mediaDefinitions')
  268. if isinstance(media_definitions, list):
  269. for definition in media_definitions:
  270. if not isinstance(definition, dict):
  271. continue
  272. video_url = definition.get('videoUrl')
  273. if not video_url or not isinstance(video_url, compat_str):
  274. continue
  275. if video_url in video_urls_set:
  276. continue
  277. video_urls_set.add(video_url)
  278. video_urls.append(
  279. (video_url, int_or_none(definition.get('quality'))))
  280. else:
  281. thumbnail, duration = [None] * 2
  282. def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
  283. assignments = self._search_regex(
  284. pattern, webpage, 'encoded url', default=default)
  285. if not assignments:
  286. return {}
  287. assignments = assignments.split(';')
  288. js_vars = {}
  289. def parse_js_value(inp):
  290. inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
  291. if '+' in inp:
  292. inps = inp.split('+')
  293. return functools.reduce(
  294. operator.concat, map(parse_js_value, inps))
  295. inp = inp.strip()
  296. if inp in js_vars:
  297. return js_vars[inp]
  298. return remove_quotes(inp)
  299. for assn in assignments:
  300. assn = assn.strip()
  301. if not assn:
  302. continue
  303. assn = re.sub(r'var\s+', '', assn)
  304. vname, value = assn.split('=', 1)
  305. js_vars[vname] = parse_js_value(value)
  306. return js_vars
  307. def add_video_url(video_url):
  308. v_url = url_or_none(video_url)
  309. if not v_url:
  310. return
  311. if v_url in video_urls_set:
  312. return
  313. video_urls.append((v_url, None))
  314. video_urls_set.add(v_url)
  315. def parse_quality_items(quality_items):
  316. q_items = self._parse_json(quality_items, video_id, fatal=False)
  317. if not isinstance(q_items, list):
  318. return
  319. for item in q_items:
  320. if isinstance(item, dict):
  321. add_video_url(item.get('url'))
  322. if not video_urls:
  323. FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
  324. js_vars = extract_js_vars(
  325. webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
  326. default=None)
  327. if js_vars:
  328. for key, format_url in js_vars.items():
  329. if key.startswith(FORMAT_PREFIXES[-1]):
  330. parse_quality_items(format_url)
  331. elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
  332. add_video_url(format_url)
  333. if not video_urls and re.search(
  334. r'<[^>]+\bid=["\']lockedPlayer', webpage):
  335. raise ExtractorError(
  336. 'Video %s is locked' % video_id, expected=True)
  337. if not video_urls:
  338. js_vars = extract_js_vars(
  339. dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
  340. add_video_url(js_vars['mediastring'])
  341. for mobj in re.finditer(
  342. r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
  343. webpage):
  344. video_url = mobj.group('url')
  345. if video_url not in video_urls_set:
  346. video_urls.append((video_url, None))
  347. video_urls_set.add(video_url)
  348. upload_date = None
  349. formats = []
  350. def add_format(format_url, height=None):
  351. tbr = None
  352. mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
  353. if mobj:
  354. if not height:
  355. height = int(mobj.group('height'))
  356. tbr = int(mobj.group('tbr'))
  357. formats.append({
  358. 'url': format_url,
  359. 'format_id': '%dp' % height if height else None,
  360. 'height': height,
  361. 'tbr': tbr,
  362. })
  363. for video_url, height in video_urls:
  364. if not upload_date:
  365. upload_date = self._search_regex(
  366. r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
  367. if upload_date:
  368. upload_date = upload_date.replace('/', '')
  369. ext = determine_ext(video_url)
  370. if ext == 'mpd':
  371. formats.extend(self._extract_mpd_formats(
  372. video_url, video_id, mpd_id='dash', fatal=False))
  373. continue
  374. elif ext == 'm3u8':
  375. formats.extend(self._extract_m3u8_formats(
  376. video_url, video_id, 'mp4', entry_protocol='m3u8_native',
  377. m3u8_id='hls', fatal=False))
  378. continue
  379. if '/video/get_media' in video_url:
  380. medias = self._download_json(video_url, video_id, fatal=False)
  381. if isinstance(medias, list):
  382. for media in medias:
  383. if not isinstance(media, dict):
  384. continue
  385. video_url = url_or_none(media.get('videoUrl'))
  386. if not video_url:
  387. continue
  388. height = int_or_none(media.get('quality'))
  389. add_format(video_url, height)
  390. continue
  391. add_format(video_url)
  392. self._sort_formats(formats)
  393. video_uploader = self._html_search_regex(
  394. r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
  395. webpage, 'uploader', default=None)
  396. def extract_vote_count(kind, name):
  397. return self._extract_count(
  398. (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
  399. r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
  400. webpage, name)
  401. view_count = self._extract_count(
  402. r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
  403. like_count = extract_vote_count('Up', 'like')
  404. dislike_count = extract_vote_count('Down', 'dislike')
  405. comment_count = self._extract_count(
  406. r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
  407. def extract_list(meta_key):
  408. div = self._search_regex(
  409. r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
  410. % meta_key, webpage, meta_key, default=None)
  411. if div:
  412. return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
  413. info = self._search_json_ld(webpage, video_id, default={})
  414. # description provided in JSON-LD is irrelevant
  415. info['description'] = None
  416. return merge_dicts({
  417. 'id': video_id,
  418. 'uploader': video_uploader,
  419. 'upload_date': upload_date,
  420. 'title': title,
  421. 'thumbnail': thumbnail,
  422. 'duration': duration,
  423. 'view_count': view_count,
  424. 'like_count': like_count,
  425. 'dislike_count': dislike_count,
  426. 'comment_count': comment_count,
  427. 'formats': formats,
  428. 'age_limit': 18,
  429. 'tags': extract_list('tags'),
  430. 'categories': extract_list('categories'),
  431. 'subtitles': subtitles,
  432. }, info)
  433. class PornHubPlaylistBaseIE(PornHubBaseIE):
  434. def _extract_page(self, url):
  435. return int_or_none(self._search_regex(
  436. r'\bpage=(\d+)', url, 'page', default=None))
  437. def _extract_entries(self, webpage, host):
  438. # Only process container div with main playlist content skipping
  439. # drop-down menu that uses similar pattern for videos (see
  440. # https://github.com/ytdl-org/youtube-dl/issues/11594).
  441. container = self._search_regex(
  442. r'(?s)(<div[^>]+class=["\']container.+)', webpage,
  443. 'container', default=webpage)
  444. return [
  445. self.url_result(
  446. 'http://www.%s/%s' % (host, video_url),
  447. PornHubIE.ie_key(), video_title=title)
  448. for video_url, title in orderedSet(re.findall(
  449. r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
  450. container))
  451. ]
  452. class PornHubUserIE(PornHubPlaylistBaseIE):
  453. _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
  454. _TESTS = [{
  455. 'url': 'https://www.pornhub.com/model/zoe_ph',
  456. 'playlist_mincount': 118,
  457. }, {
  458. 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
  459. 'info_dict': {
  460. 'id': 'liz-vicious',
  461. },
  462. 'playlist_mincount': 118,
  463. }, {
  464. 'url': 'https://www.pornhub.com/users/russianveet69',
  465. 'only_matching': True,
  466. }, {
  467. 'url': 'https://www.pornhub.com/channels/povd',
  468. 'only_matching': True,
  469. }, {
  470. 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
  471. 'only_matching': True,
  472. }, {
  473. # Unavailable via /videos page, but available with direct pagination
  474. # on pornstar page (see [1]), requires premium
  475. # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
  476. 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
  477. 'only_matching': True,
  478. }, {
  479. # Same as before, multi page
  480. 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
  481. 'only_matching': True,
  482. }]
  483. def _real_extract(self, url):
  484. mobj = re.match(self._VALID_URL, url)
  485. user_id = mobj.group('id')
  486. videos_url = '%s/videos' % mobj.group('url')
  487. page = self._extract_page(url)
  488. if page:
  489. videos_url = update_url_query(videos_url, {'page': page})
  490. return self.url_result(
  491. videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
  492. class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
  493. @staticmethod
  494. def _has_more(webpage):
  495. return re.search(
  496. r'''(?x)
  497. <li[^>]+\bclass=["\']page_next|
  498. <link[^>]+\brel=["\']next|
  499. <button[^>]+\bid=["\']moreDataBtn
  500. ''', webpage) is not None
  501. def _entries(self, url, host, item_id):
  502. page = self._extract_page(url)
  503. VIDEOS = '/videos'
  504. def download_page(base_url, num, fallback=False):
  505. note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
  506. return self._download_webpage(
  507. base_url, item_id, note, query={'page': num})
  508. def is_404(e):
  509. return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
  510. base_url = url
  511. has_page = page is not None
  512. first_page = page if has_page else 1
  513. for page_num in (first_page, ) if has_page else itertools.count(first_page):
  514. try:
  515. try:
  516. webpage = download_page(base_url, page_num)
  517. except ExtractorError as e:
  518. # Some sources may not be available via /videos page,
  519. # trying to fallback to main page pagination (see [1])
  520. # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
  521. if is_404(e) and page_num == first_page and VIDEOS in base_url:
  522. base_url = base_url.replace(VIDEOS, '')
  523. webpage = download_page(base_url, page_num, fallback=True)
  524. else:
  525. raise
  526. except ExtractorError as e:
  527. if is_404(e) and page_num != first_page:
  528. break
  529. raise
  530. page_entries = self._extract_entries(webpage, host)
  531. if not page_entries:
  532. break
  533. for e in page_entries:
  534. yield e
  535. if not self._has_more(webpage):
  536. break
  537. def _real_extract(self, url):
  538. mobj = re.match(self._VALID_URL, url)
  539. host = mobj.group('host')
  540. item_id = mobj.group('id')
  541. self._login(host)
  542. return self.playlist_result(self._entries(url, host, item_id), item_id)
  543. class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
  544. _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
  545. _TESTS = [{
  546. 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
  547. 'only_matching': True,
  548. }, {
  549. 'url': 'http://www.pornhub.com/users/rushandlia/videos',
  550. 'only_matching': True,
  551. }, {
  552. 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
  553. 'info_dict': {
  554. 'id': 'pornstar/jenny-blighe/videos',
  555. },
  556. 'playlist_mincount': 149,
  557. }, {
  558. 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
  559. 'info_dict': {
  560. 'id': 'pornstar/jenny-blighe/videos',
  561. },
  562. 'playlist_mincount': 40,
  563. }, {
  564. # default sorting as Top Rated Videos
  565. 'url': 'https://www.pornhub.com/channels/povd/videos',
  566. 'info_dict': {
  567. 'id': 'channels/povd/videos',
  568. },
  569. 'playlist_mincount': 293,
  570. }, {
  571. # Top Rated Videos
  572. 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
  573. 'only_matching': True,
  574. }, {
  575. # Most Recent Videos
  576. 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
  577. 'only_matching': True,
  578. }, {
  579. # Most Viewed Videos
  580. 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
  581. 'only_matching': True,
  582. }, {
  583. 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
  584. 'only_matching': True,
  585. }, {
  586. # Most Viewed Videos
  587. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
  588. 'only_matching': True,
  589. }, {
  590. # Top Rated Videos
  591. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
  592. 'only_matching': True,
  593. }, {
  594. # Longest Videos
  595. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
  596. 'only_matching': True,
  597. }, {
  598. # Newest Videos
  599. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
  600. 'only_matching': True,
  601. }, {
  602. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
  603. 'only_matching': True,
  604. }, {
  605. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
  606. 'only_matching': True,
  607. }, {
  608. 'url': 'https://www.pornhub.com/video',
  609. 'only_matching': True,
  610. }, {
  611. 'url': 'https://www.pornhub.com/video?page=3',
  612. 'only_matching': True,
  613. }, {
  614. 'url': 'https://www.pornhub.com/video/search?search=123',
  615. 'only_matching': True,
  616. }, {
  617. 'url': 'https://www.pornhub.com/categories/teen',
  618. 'only_matching': True,
  619. }, {
  620. 'url': 'https://www.pornhub.com/categories/teen?page=3',
  621. 'only_matching': True,
  622. }, {
  623. 'url': 'https://www.pornhub.com/hd',
  624. 'only_matching': True,
  625. }, {
  626. 'url': 'https://www.pornhub.com/hd?page=3',
  627. 'only_matching': True,
  628. }, {
  629. 'url': 'https://www.pornhub.com/described-video',
  630. 'only_matching': True,
  631. }, {
  632. 'url': 'https://www.pornhub.com/described-video?page=2',
  633. 'only_matching': True,
  634. }, {
  635. 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
  636. 'only_matching': True,
  637. }, {
  638. 'url': 'https://www.pornhub.com/playlist/44121572',
  639. 'info_dict': {
  640. 'id': 'playlist/44121572',
  641. },
  642. 'playlist_mincount': 132,
  643. }, {
  644. 'url': 'https://www.pornhub.com/playlist/4667351',
  645. 'only_matching': True,
  646. }, {
  647. 'url': 'https://de.pornhub.com/playlist/4667351',
  648. 'only_matching': True,
  649. }]
  650. @classmethod
  651. def suitable(cls, url):
  652. return (False
  653. if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
  654. else super(PornHubPagedVideoListIE, cls).suitable(url))
  655. class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
  656. _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
  657. _TESTS = [{
  658. 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
  659. 'info_dict': {
  660. 'id': 'jenny-blighe',
  661. },
  662. 'playlist_mincount': 129,
  663. }, {
  664. 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
  665. 'only_matching': True,
  666. }]