1
0

pornhub.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import functools
  4. import itertools
  5. import operator
  6. import re
  7. from .common import InfoExtractor
  8. from ..compat import (
  9. compat_HTTPError,
  10. compat_str,
  11. compat_urllib_request,
  12. )
  13. from .openload import PhantomJSwrapper
  14. from ..utils import (
  15. determine_ext,
  16. ExtractorError,
  17. int_or_none,
  18. merge_dicts,
  19. NO_DEFAULT,
  20. orderedSet,
  21. remove_quotes,
  22. str_to_int,
  23. update_url_query,
  24. urlencode_postdata,
  25. url_or_none,
  26. )
  27. class PornHubBaseIE(InfoExtractor):
  28. _NETRC_MACHINE = 'pornhub'
  29. def _download_webpage_handle(self, *args, **kwargs):
  30. def dl(*args, **kwargs):
  31. return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
  32. ret = dl(*args, **kwargs)
  33. if not ret:
  34. return ret
  35. webpage, urlh = ret
  36. if any(re.search(p, webpage) for p in (
  37. r'<body\b[^>]+\bonload=["\']go\(\)',
  38. r'document\.cookie\s*=\s*["\']RNKEY=',
  39. r'document\.location\.reload\(true\)')):
  40. url_or_request = args[0]
  41. url = (url_or_request.get_full_url()
  42. if isinstance(url_or_request, compat_urllib_request.Request)
  43. else url_or_request)
  44. phantom = PhantomJSwrapper(self, required_version='2.0')
  45. phantom.get(url, html=webpage)
  46. webpage, urlh = dl(*args, **kwargs)
  47. return webpage, urlh
  48. def _real_initialize(self):
  49. self._logged_in = False
  50. def _login(self, host):
  51. if self._logged_in:
  52. return
  53. site = host.split('.')[0]
  54. # Both sites pornhub and pornhubpremium have separate accounts
  55. # so there should be an option to provide credentials for both.
  56. # At the same time some videos are available under the same video id
  57. # on both sites so that we have to identify them as the same video.
  58. # For that purpose we have to keep both in the same extractor
  59. # but under different netrc machines.
  60. username, password = self._get_login_info(netrc_machine=site)
  61. if username is None:
  62. return
  63. login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
  64. login_page = self._download_webpage(
  65. login_url, None, 'Downloading %s login page' % site)
  66. def is_logged(webpage):
  67. return any(re.search(p, webpage) for p in (
  68. r'class=["\']signOut',
  69. r'>Sign\s+[Oo]ut\s*<'))
  70. if is_logged(login_page):
  71. self._logged_in = True
  72. return
  73. login_form = self._hidden_inputs(login_page)
  74. login_form.update({
  75. 'username': username,
  76. 'password': password,
  77. })
  78. response = self._download_json(
  79. 'https://www.%s/front/authenticate' % host, None,
  80. 'Logging in to %s' % site,
  81. data=urlencode_postdata(login_form),
  82. headers={
  83. 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  84. 'Referer': login_url,
  85. 'X-Requested-With': 'XMLHttpRequest',
  86. })
  87. if response.get('success') == '1':
  88. self._logged_in = True
  89. return
  90. message = response.get('message')
  91. if message is not None:
  92. raise ExtractorError(
  93. 'Unable to login: %s' % message, expected=True)
  94. raise ExtractorError('Unable to log in')
  95. class PornHubIE(PornHubBaseIE):
  96. IE_DESC = 'PornHub and Thumbzilla'
  97. _VALID_URL = r'''(?x)
  98. https?://
  99. (?:
  100. (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
  101. (?:www\.)?thumbzilla\.com/video/
  102. )
  103. (?P<id>[\da-z]+)
  104. '''
  105. _TESTS = [{
  106. 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
  107. 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
  108. 'info_dict': {
  109. 'id': '648719015',
  110. 'ext': 'mp4',
  111. 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
  112. 'uploader': 'Babes',
  113. 'upload_date': '20130628',
  114. 'timestamp': 1372447216,
  115. 'duration': 361,
  116. 'view_count': int,
  117. 'like_count': int,
  118. 'dislike_count': int,
  119. 'comment_count': int,
  120. 'age_limit': 18,
  121. 'tags': list,
  122. 'categories': list,
  123. },
  124. }, {
  125. # non-ASCII title
  126. 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
  127. 'info_dict': {
  128. 'id': '1331683002',
  129. 'ext': 'mp4',
  130. 'title': '重庆婷婷女王足交',
  131. 'upload_date': '20150213',
  132. 'timestamp': 1423804862,
  133. 'duration': 1753,
  134. 'view_count': int,
  135. 'like_count': int,
  136. 'dislike_count': int,
  137. 'comment_count': int,
  138. 'age_limit': 18,
  139. 'tags': list,
  140. 'categories': list,
  141. },
  142. 'params': {
  143. 'skip_download': True,
  144. },
  145. 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
  146. }, {
  147. # subtitles
  148. 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
  149. 'info_dict': {
  150. 'id': 'ph5af5fef7c2aa7',
  151. 'ext': 'mp4',
  152. 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
  153. 'uploader': 'BFFs',
  154. 'duration': 622,
  155. 'view_count': int,
  156. 'like_count': int,
  157. 'dislike_count': int,
  158. 'comment_count': int,
  159. 'age_limit': 18,
  160. 'tags': list,
  161. 'categories': list,
  162. 'subtitles': {
  163. 'en': [{
  164. "ext": 'srt'
  165. }]
  166. },
  167. },
  168. 'params': {
  169. 'skip_download': True,
  170. },
  171. 'skip': 'This video has been disabled',
  172. }, {
  173. 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
  174. 'only_matching': True,
  175. }, {
  176. # removed at the request of cam4.com
  177. 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
  178. 'only_matching': True,
  179. }, {
  180. # removed at the request of the copyright owner
  181. 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
  182. 'only_matching': True,
  183. }, {
  184. # removed by uploader
  185. 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
  186. 'only_matching': True,
  187. }, {
  188. # private video
  189. 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
  190. 'only_matching': True,
  191. }, {
  192. 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
  193. 'only_matching': True,
  194. }, {
  195. 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
  196. 'only_matching': True,
  197. }, {
  198. 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
  199. 'only_matching': True,
  200. }, {
  201. 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
  202. 'only_matching': True,
  203. }, {
  204. 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
  205. 'only_matching': True,
  206. }, {
  207. # Some videos are available with the same id on both premium
  208. # and non-premium sites (e.g. this and the following test)
  209. 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
  210. 'only_matching': True,
  211. }, {
  212. 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
  213. 'only_matching': True,
  214. }]
  215. @staticmethod
  216. def _extract_urls(webpage):
  217. return re.findall(
  218. r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
  219. webpage)
  220. def _extract_count(self, pattern, webpage, name):
  221. return str_to_int(self._search_regex(
  222. pattern, webpage, '%s count' % name, fatal=False))
  223. def _real_extract(self, url):
  224. mobj = re.match(self._VALID_URL, url)
  225. host = mobj.group('host') or 'pornhub.com'
  226. video_id = mobj.group('id')
  227. self._login(host)
  228. self._set_cookie(host, 'age_verified', '1')
  229. def dl_webpage(platform):
  230. self._set_cookie(host, 'platform', platform)
  231. return self._download_webpage(
  232. 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
  233. video_id, 'Downloading %s webpage' % platform)
  234. webpage = dl_webpage('pc')
  235. error_msg = self._html_search_regex(
  236. (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
  237. r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
  238. webpage, 'error message', default=None, group='error')
  239. if error_msg:
  240. error_msg = re.sub(r'\s+', ' ', error_msg)
  241. raise ExtractorError(
  242. 'PornHub said: %s' % error_msg,
  243. expected=True, video_id=video_id)
  244. # video_title from flashvars contains whitespace instead of non-ASCII (see
  245. # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
  246. # on that anymore.
  247. title = self._html_search_meta(
  248. 'twitter:title', webpage, default=None) or self._html_search_regex(
  249. (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
  250. r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
  251. r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
  252. webpage, 'title', group='title')
  253. video_urls = []
  254. video_urls_set = set()
  255. subtitles = {}
  256. flashvars = self._parse_json(
  257. self._search_regex(
  258. r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
  259. video_id)
  260. if flashvars:
  261. subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
  262. if subtitle_url:
  263. subtitles.setdefault('en', []).append({
  264. 'url': subtitle_url,
  265. 'ext': 'srt',
  266. })
  267. thumbnail = flashvars.get('image_url')
  268. duration = int_or_none(flashvars.get('video_duration'))
  269. media_definitions = flashvars.get('mediaDefinitions')
  270. if isinstance(media_definitions, list):
  271. for definition in media_definitions:
  272. if not isinstance(definition, dict):
  273. continue
  274. video_url = definition.get('videoUrl')
  275. if not video_url or not isinstance(video_url, compat_str):
  276. continue
  277. if video_url in video_urls_set:
  278. continue
  279. video_urls_set.add(video_url)
  280. video_urls.append(
  281. (video_url, int_or_none(definition.get('quality'))))
  282. else:
  283. thumbnail, duration = [None] * 2
  284. def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
  285. assignments = self._search_regex(
  286. pattern, webpage, 'encoded url', default=default)
  287. if not assignments:
  288. return {}
  289. assignments = assignments.split(';')
  290. js_vars = {}
  291. def parse_js_value(inp):
  292. inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
  293. if '+' in inp:
  294. inps = inp.split('+')
  295. return functools.reduce(
  296. operator.concat, map(parse_js_value, inps))
  297. inp = inp.strip()
  298. if inp in js_vars:
  299. return js_vars[inp]
  300. return remove_quotes(inp)
  301. for assn in assignments:
  302. assn = assn.strip()
  303. if not assn:
  304. continue
  305. assn = re.sub(r'var\s+', '', assn)
  306. vname, value = assn.split('=', 1)
  307. js_vars[vname] = parse_js_value(value)
  308. return js_vars
  309. def add_video_url(video_url):
  310. v_url = url_or_none(video_url)
  311. if not v_url:
  312. return
  313. if v_url in video_urls_set:
  314. return
  315. video_urls.append((v_url, None))
  316. video_urls_set.add(v_url)
  317. def parse_quality_items(quality_items):
  318. q_items = self._parse_json(quality_items, video_id, fatal=False)
  319. if not isinstance(q_items, list):
  320. return
  321. for item in q_items:
  322. if isinstance(item, dict):
  323. add_video_url(item.get('url'))
  324. if not video_urls:
  325. FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
  326. js_vars = extract_js_vars(
  327. webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
  328. default=None)
  329. if js_vars:
  330. for key, format_url in js_vars.items():
  331. if key.startswith(FORMAT_PREFIXES[-1]):
  332. parse_quality_items(format_url)
  333. elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
  334. add_video_url(format_url)
  335. if not video_urls and re.search(
  336. r'<[^>]+\bid=["\']lockedPlayer', webpage):
  337. raise ExtractorError(
  338. 'Video %s is locked' % video_id, expected=True)
  339. if not video_urls:
  340. js_vars = extract_js_vars(
  341. dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
  342. add_video_url(js_vars['mediastring'])
  343. for mobj in re.finditer(
  344. r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
  345. webpage):
  346. video_url = mobj.group('url')
  347. if video_url not in video_urls_set:
  348. video_urls.append((video_url, None))
  349. video_urls_set.add(video_url)
  350. upload_date = None
  351. formats = []
  352. def add_format(format_url, height=None):
  353. ext = determine_ext(format_url)
  354. if ext == 'mpd':
  355. formats.extend(self._extract_mpd_formats(
  356. format_url, video_id, mpd_id='dash', fatal=False))
  357. return
  358. if ext == 'm3u8':
  359. formats.extend(self._extract_m3u8_formats(
  360. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  361. m3u8_id='hls', fatal=False))
  362. return
  363. if not height:
  364. height = int_or_none(self._search_regex(
  365. r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
  366. default=None))
  367. formats.append({
  368. 'url': format_url,
  369. 'format_id': '%dp' % height if height else None,
  370. 'height': height,
  371. })
  372. for video_url, height in video_urls:
  373. if not upload_date:
  374. upload_date = self._search_regex(
  375. r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
  376. if upload_date:
  377. upload_date = upload_date.replace('/', '')
  378. if '/video/get_media' in video_url:
  379. medias = self._download_json(video_url, video_id, fatal=False)
  380. if isinstance(medias, list):
  381. for media in medias:
  382. if not isinstance(media, dict):
  383. continue
  384. video_url = url_or_none(media.get('videoUrl'))
  385. if not video_url:
  386. continue
  387. height = int_or_none(media.get('quality'))
  388. add_format(video_url, height)
  389. continue
  390. add_format(video_url)
  391. self._sort_formats(
  392. formats, field_preference=('height', 'width', 'fps', 'format_id'))
  393. video_uploader = self._html_search_regex(
  394. r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
  395. webpage, 'uploader', default=None)
  396. def extract_vote_count(kind, name):
  397. return self._extract_count(
  398. (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
  399. r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
  400. webpage, name)
  401. view_count = self._extract_count(
  402. r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
  403. like_count = extract_vote_count('Up', 'like')
  404. dislike_count = extract_vote_count('Down', 'dislike')
  405. comment_count = self._extract_count(
  406. r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
  407. def extract_list(meta_key):
  408. div = self._search_regex(
  409. r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
  410. % meta_key, webpage, meta_key, default=None)
  411. if div:
  412. return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
  413. info = self._search_json_ld(webpage, video_id, default={})
  414. # description provided in JSON-LD is irrelevant
  415. info['description'] = None
  416. return merge_dicts({
  417. 'id': video_id,
  418. 'uploader': video_uploader,
  419. 'upload_date': upload_date,
  420. 'title': title,
  421. 'thumbnail': thumbnail,
  422. 'duration': duration,
  423. 'view_count': view_count,
  424. 'like_count': like_count,
  425. 'dislike_count': dislike_count,
  426. 'comment_count': comment_count,
  427. 'formats': formats,
  428. 'age_limit': 18,
  429. 'tags': extract_list('tags'),
  430. 'categories': extract_list('categories'),
  431. 'subtitles': subtitles,
  432. }, info)
  433. class PornHubPlaylistBaseIE(PornHubBaseIE):
  434. def _extract_page(self, url):
  435. return int_or_none(self._search_regex(
  436. r'\bpage=(\d+)', url, 'page', default=None))
  437. def _extract_entries(self, webpage, host):
  438. # Only process container div with main playlist content skipping
  439. # drop-down menu that uses similar pattern for videos (see
  440. # https://github.com/ytdl-org/youtube-dl/issues/11594).
  441. container = self._search_regex(
  442. r'(?s)(<div[^>]+class=["\']container.+)', webpage,
  443. 'container', default=webpage)
  444. return [
  445. self.url_result(
  446. 'http://www.%s/%s' % (host, video_url),
  447. PornHubIE.ie_key(), video_title=title)
  448. for video_url, title in orderedSet(re.findall(
  449. r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
  450. container))
  451. ]
  452. class PornHubUserIE(PornHubPlaylistBaseIE):
  453. _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
  454. _TESTS = [{
  455. 'url': 'https://www.pornhub.com/model/zoe_ph',
  456. 'playlist_mincount': 118,
  457. }, {
  458. 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
  459. 'info_dict': {
  460. 'id': 'liz-vicious',
  461. },
  462. 'playlist_mincount': 118,
  463. }, {
  464. 'url': 'https://www.pornhub.com/users/russianveet69',
  465. 'only_matching': True,
  466. }, {
  467. 'url': 'https://www.pornhub.com/channels/povd',
  468. 'only_matching': True,
  469. }, {
  470. 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
  471. 'only_matching': True,
  472. }, {
  473. # Unavailable via /videos page, but available with direct pagination
  474. # on pornstar page (see [1]), requires premium
  475. # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
  476. 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
  477. 'only_matching': True,
  478. }, {
  479. # Same as before, multi page
  480. 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
  481. 'only_matching': True,
  482. }]
  483. def _real_extract(self, url):
  484. mobj = re.match(self._VALID_URL, url)
  485. user_id = mobj.group('id')
  486. videos_url = '%s/videos' % mobj.group('url')
  487. page = self._extract_page(url)
  488. if page:
  489. videos_url = update_url_query(videos_url, {'page': page})
  490. return self.url_result(
  491. videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
  492. class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
  493. @staticmethod
  494. def _has_more(webpage):
  495. return re.search(
  496. r'''(?x)
  497. <li[^>]+\bclass=["\']page_next|
  498. <link[^>]+\brel=["\']next|
  499. <button[^>]+\bid=["\']moreDataBtn
  500. ''', webpage) is not None
  501. def _entries(self, url, host, item_id):
  502. page = self._extract_page(url)
  503. VIDEOS = '/videos'
  504. def download_page(base_url, num, fallback=False):
  505. note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
  506. return self._download_webpage(
  507. base_url, item_id, note, query={'page': num})
  508. def is_404(e):
  509. return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
  510. base_url = url
  511. has_page = page is not None
  512. first_page = page if has_page else 1
  513. for page_num in (first_page, ) if has_page else itertools.count(first_page):
  514. try:
  515. try:
  516. webpage = download_page(base_url, page_num)
  517. except ExtractorError as e:
  518. # Some sources may not be available via /videos page,
  519. # trying to fallback to main page pagination (see [1])
  520. # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
  521. if is_404(e) and page_num == first_page and VIDEOS in base_url:
  522. base_url = base_url.replace(VIDEOS, '')
  523. webpage = download_page(base_url, page_num, fallback=True)
  524. else:
  525. raise
  526. except ExtractorError as e:
  527. if is_404(e) and page_num != first_page:
  528. break
  529. raise
  530. page_entries = self._extract_entries(webpage, host)
  531. if not page_entries:
  532. break
  533. for e in page_entries:
  534. yield e
  535. if not self._has_more(webpage):
  536. break
  537. def _real_extract(self, url):
  538. mobj = re.match(self._VALID_URL, url)
  539. host = mobj.group('host')
  540. item_id = mobj.group('id')
  541. self._login(host)
  542. return self.playlist_result(self._entries(url, host, item_id), item_id)
  543. class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
  544. _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
  545. _TESTS = [{
  546. 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
  547. 'only_matching': True,
  548. }, {
  549. 'url': 'http://www.pornhub.com/users/rushandlia/videos',
  550. 'only_matching': True,
  551. }, {
  552. 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
  553. 'info_dict': {
  554. 'id': 'pornstar/jenny-blighe/videos',
  555. },
  556. 'playlist_mincount': 149,
  557. }, {
  558. 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
  559. 'info_dict': {
  560. 'id': 'pornstar/jenny-blighe/videos',
  561. },
  562. 'playlist_mincount': 40,
  563. }, {
  564. # default sorting as Top Rated Videos
  565. 'url': 'https://www.pornhub.com/channels/povd/videos',
  566. 'info_dict': {
  567. 'id': 'channels/povd/videos',
  568. },
  569. 'playlist_mincount': 293,
  570. }, {
  571. # Top Rated Videos
  572. 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
  573. 'only_matching': True,
  574. }, {
  575. # Most Recent Videos
  576. 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
  577. 'only_matching': True,
  578. }, {
  579. # Most Viewed Videos
  580. 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
  581. 'only_matching': True,
  582. }, {
  583. 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
  584. 'only_matching': True,
  585. }, {
  586. # Most Viewed Videos
  587. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
  588. 'only_matching': True,
  589. }, {
  590. # Top Rated Videos
  591. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
  592. 'only_matching': True,
  593. }, {
  594. # Longest Videos
  595. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
  596. 'only_matching': True,
  597. }, {
  598. # Newest Videos
  599. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
  600. 'only_matching': True,
  601. }, {
  602. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
  603. 'only_matching': True,
  604. }, {
  605. 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
  606. 'only_matching': True,
  607. }, {
  608. 'url': 'https://www.pornhub.com/video',
  609. 'only_matching': True,
  610. }, {
  611. 'url': 'https://www.pornhub.com/video?page=3',
  612. 'only_matching': True,
  613. }, {
  614. 'url': 'https://www.pornhub.com/video/search?search=123',
  615. 'only_matching': True,
  616. }, {
  617. 'url': 'https://www.pornhub.com/categories/teen',
  618. 'only_matching': True,
  619. }, {
  620. 'url': 'https://www.pornhub.com/categories/teen?page=3',
  621. 'only_matching': True,
  622. }, {
  623. 'url': 'https://www.pornhub.com/hd',
  624. 'only_matching': True,
  625. }, {
  626. 'url': 'https://www.pornhub.com/hd?page=3',
  627. 'only_matching': True,
  628. }, {
  629. 'url': 'https://www.pornhub.com/described-video',
  630. 'only_matching': True,
  631. }, {
  632. 'url': 'https://www.pornhub.com/described-video?page=2',
  633. 'only_matching': True,
  634. }, {
  635. 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
  636. 'only_matching': True,
  637. }, {
  638. 'url': 'https://www.pornhub.com/playlist/44121572',
  639. 'info_dict': {
  640. 'id': 'playlist/44121572',
  641. },
  642. 'playlist_mincount': 132,
  643. }, {
  644. 'url': 'https://www.pornhub.com/playlist/4667351',
  645. 'only_matching': True,
  646. }, {
  647. 'url': 'https://de.pornhub.com/playlist/4667351',
  648. 'only_matching': True,
  649. }]
  650. @classmethod
  651. def suitable(cls, url):
  652. return (False
  653. if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
  654. else super(PornHubPagedVideoListIE, cls).suitable(url))
  655. class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
  656. _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
  657. _TESTS = [{
  658. 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
  659. 'info_dict': {
  660. 'id': 'jenny-blighe',
  661. },
  662. 'playlist_mincount': 129,
  663. }, {
  664. 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
  665. 'only_matching': True,
  666. }]