peertube.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. int_or_none,
  8. parse_resolution,
  9. try_get,
  10. unified_timestamp,
  11. urljoin,
  12. )
  13. class PeerTubeIE(InfoExtractor):
  14. _INSTANCES_RE = r'''(?:
  15. # Taken from https://instances.joinpeertube.org/instances
  16. tube\.openalgeria\.org|
  17. peertube\.pointsecu\.fr|
  18. peertube\.nogafa\.org|
  19. peertube\.pl|
  20. megatube\.lilomoino\.fr|
  21. peertube\.tamanoir\.foucry\.net|
  22. peertube\.inapurna\.org|
  23. peertube\.netzspielplatz\.de|
  24. video\.deadsuperhero\.com|
  25. peertube\.devosi\.org|
  26. peertube\.1312\.media|
  27. tube\.worldofhauru\.xyz|
  28. tube\.bootlicker\.party|
  29. skeptikon\.fr|
  30. peertube\.geekshell\.fr|
  31. tube\.opportunis\.me|
  32. peertube\.peshane\.net|
  33. video\.blueline\.mg|
  34. tube\.homecomputing\.fr|
  35. videos\.cloudfrancois\.fr|
  36. peertube\.viviers-fibre\.net|
  37. tube\.ouahpiti\.info|
  38. video\.tedomum\.net|
  39. video\.g3l\.org|
  40. fontube\.fr|
  41. peertube\.gaialabs\.ch|
  42. peertube\.extremely\.online|
  43. peertube\.public-infrastructure\.eu|
  44. tube\.kher\.nl|
  45. peertube\.qtg\.fr|
  46. tube\.22decembre\.eu|
  47. facegirl\.me|
  48. video\.migennes\.net|
  49. janny\.moe|
  50. tube\.p2p\.legal|
  51. video\.atlanti\.se|
  52. troll\.tv|
  53. peertube\.geekael\.fr|
  54. vid\.leotindall\.com|
  55. video\.anormallostpod\.ovh|
  56. p-tube\.h3z\.jp|
  57. tube\.darfweb\.eu|
  58. videos\.iut-orsay\.fr|
  59. peertube\.solidev\.net|
  60. videos\.symphonie-of-code\.fr|
  61. testtube\.ortg\.de|
  62. videos\.cemea\.org|
  63. peertube\.gwendalavir\.eu|
  64. video\.passageenseine\.fr|
  65. videos\.festivalparminous\.org|
  66. peertube\.touhoppai\.moe|
  67. peertube\.duckdns\.org|
  68. sikke\.fi|
  69. peertube\.mastodon\.host|
  70. firedragonvideos\.com|
  71. vidz\.dou\.bet|
  72. peertube\.koehn\.com|
  73. peer\.hostux\.social|
  74. share\.tube|
  75. peertube\.walkingmountains\.fr|
  76. medias\.libox\.fr|
  77. peertube\.moe|
  78. peertube\.xyz|
  79. jp\.peertube\.network|
  80. videos\.benpro\.fr|
  81. tube\.otter\.sh|
  82. peertube\.angristan\.xyz|
  83. peertube\.parleur\.net|
  84. peer\.ecutsa\.fr|
  85. peertube\.heraut\.eu|
  86. peertube\.tifox\.fr|
  87. peertube\.maly\.io|
  88. vod\.mochi\.academy|
  89. exode\.me|
  90. coste\.video|
  91. tube\.aquilenet\.fr|
  92. peertube\.gegeweb\.eu|
  93. framatube\.org|
  94. thinkerview\.video|
  95. tube\.conferences-gesticulees\.net|
  96. peertube\.datagueule\.tv|
  97. video\.lqdn\.fr|
  98. meilleurtube\.delire\.party|
  99. tube\.mochi\.academy|
  100. peertube\.dav\.li|
  101. media\.zat\.im|
  102. pytu\.be|
  103. peertube\.valvin\.fr|
  104. peertube\.nsa\.ovh|
  105. video\.colibris-outilslibres\.org|
  106. video\.hispagatos\.org|
  107. tube\.svnet\.fr|
  108. peertube\.video|
  109. videos\.lecygnenoir\.info|
  110. peertube3\.cpy\.re|
  111. peertube2\.cpy\.re|
  112. videos\.tcit\.fr|
  113. peertube\.cpy\.re
  114. )'''
  115. _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
  116. _VALID_URL = r'''(?x)
  117. (?:
  118. peertube:(?P<host>[^:]+):|
  119. https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
  120. )
  121. (?P<id>%s)
  122. ''' % (_INSTANCES_RE, _UUID_RE)
  123. _TESTS = [{
  124. 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
  125. 'md5': '80f24ff364cc9d333529506a263e7feb',
  126. 'info_dict': {
  127. 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
  128. 'ext': 'mp4',
  129. 'title': 'wow',
  130. 'description': 'wow such video, so gif',
  131. 'thumbnail': r're:https?://.*\.(?:jpg|png)',
  132. 'timestamp': 1519297480,
  133. 'upload_date': '20180222',
  134. 'uploader': 'Luclu7',
  135. 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
  136. 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
  137. 'license': 'Unknown',
  138. 'duration': 3,
  139. 'view_count': int,
  140. 'like_count': int,
  141. 'dislike_count': int,
  142. 'tags': list,
  143. 'categories': list,
  144. }
  145. }, {
  146. 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
  147. 'only_matching': True,
  148. }, {
  149. # nsfw
  150. 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
  151. 'only_matching': True,
  152. }, {
  153. 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
  154. 'only_matching': True,
  155. }, {
  156. 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
  157. 'only_matching': True,
  158. }, {
  159. 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
  160. 'only_matching': True,
  161. }]
  162. @staticmethod
  163. def _extract_peertube_url(webpage, source_url):
  164. mobj = re.match(
  165. r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
  166. % PeerTubeIE._UUID_RE, source_url)
  167. if mobj and any(p in webpage for p in (
  168. '<title>PeerTube<',
  169. 'There will be other non JS-based clients to access PeerTube',
  170. '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
  171. return 'peertube:%s:%s' % mobj.group('host', 'id')
  172. @staticmethod
  173. def _extract_urls(webpage, source_url):
  174. entries = re.findall(
  175. r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
  176. % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
  177. if not entries:
  178. peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
  179. if peertube_url:
  180. entries = [peertube_url]
  181. return entries
  182. def _real_extract(self, url):
  183. mobj = re.match(self._VALID_URL, url)
  184. host = mobj.group('host') or mobj.group('host_2')
  185. video_id = mobj.group('id')
  186. video = self._download_json(
  187. 'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
  188. title = video['name']
  189. formats = []
  190. for file_ in video['files']:
  191. if not isinstance(file_, dict):
  192. continue
  193. file_url = file_.get('fileUrl')
  194. if not file_url or not isinstance(file_url, compat_str):
  195. continue
  196. file_size = int_or_none(file_.get('size'))
  197. format_id = try_get(
  198. file_, lambda x: x['resolution']['label'], compat_str)
  199. f = parse_resolution(format_id)
  200. f.update({
  201. 'url': file_url,
  202. 'format_id': format_id,
  203. 'filesize': file_size,
  204. })
  205. formats.append(f)
  206. self._sort_formats(formats)
  207. def account_data(field):
  208. return try_get(video, lambda x: x['account'][field], compat_str)
  209. category = try_get(video, lambda x: x['category']['label'], compat_str)
  210. categories = [category] if category else None
  211. nsfw = video.get('nsfw')
  212. if nsfw is bool:
  213. age_limit = 18 if nsfw else 0
  214. else:
  215. age_limit = None
  216. return {
  217. 'id': video_id,
  218. 'title': title,
  219. 'description': video.get('description'),
  220. 'thumbnail': urljoin(url, video.get('thumbnailPath')),
  221. 'timestamp': unified_timestamp(video.get('publishedAt')),
  222. 'uploader': account_data('displayName'),
  223. 'uploader_id': account_data('uuid'),
  224. 'uploder_url': account_data('url'),
  225. 'license': try_get(
  226. video, lambda x: x['licence']['label'], compat_str),
  227. 'duration': int_or_none(video.get('duration')),
  228. 'view_count': int_or_none(video.get('views')),
  229. 'like_count': int_or_none(video.get('likes')),
  230. 'dislike_count': int_or_none(video.get('dislikes')),
  231. 'age_limit': age_limit,
  232. 'tags': try_get(video, lambda x: x['tags'], list),
  233. 'categories': categories,
  234. 'formats': formats,
  235. }