vlive.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import itertools
  4. import json
  5. from .naver import NaverBaseIE
  6. from ..compat import (
  7. compat_HTTPError,
  8. compat_str,
  9. )
  10. from ..utils import (
  11. ExtractorError,
  12. int_or_none,
  13. merge_dicts,
  14. str_or_none,
  15. strip_or_none,
  16. try_get,
  17. urlencode_postdata,
  18. )
  19. class VLiveBaseIE(NaverBaseIE):
  20. _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
  21. class VLiveIE(VLiveBaseIE):
  22. IE_NAME = 'vlive'
  23. _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
  24. _NETRC_MACHINE = 'vlive'
  25. _TESTS = [{
  26. 'url': 'http://www.vlive.tv/video/1326',
  27. 'md5': 'cc7314812855ce56de70a06a27314983',
  28. 'info_dict': {
  29. 'id': '1326',
  30. 'ext': 'mp4',
  31. 'title': "Girl's Day's Broadcast",
  32. 'creator': "Girl's Day",
  33. 'view_count': int,
  34. 'uploader_id': 'muploader_a',
  35. },
  36. }, {
  37. 'url': 'http://www.vlive.tv/video/16937',
  38. 'info_dict': {
  39. 'id': '16937',
  40. 'ext': 'mp4',
  41. 'title': '첸백시 걍방',
  42. 'creator': 'EXO',
  43. 'view_count': int,
  44. 'subtitles': 'mincount:12',
  45. 'uploader_id': 'muploader_j',
  46. },
  47. 'params': {
  48. 'skip_download': True,
  49. },
  50. }, {
  51. 'url': 'https://www.vlive.tv/video/129100',
  52. 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
  53. 'info_dict': {
  54. 'id': '129100',
  55. 'ext': 'mp4',
  56. 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
  57. 'creator': 'BTS+',
  58. 'view_count': int,
  59. 'subtitles': 'mincount:10',
  60. },
  61. 'skip': 'This video is only available for CH+ subscribers',
  62. }, {
  63. 'url': 'https://www.vlive.tv/embed/1326',
  64. 'only_matching': True,
  65. }]
  66. def _real_initialize(self):
  67. self._login()
  68. def _login(self):
  69. email, password = self._get_login_info()
  70. if None in (email, password):
  71. return
  72. def is_logged_in():
  73. login_info = self._download_json(
  74. 'https://www.vlive.tv/auth/loginInfo', None,
  75. note='Downloading login info',
  76. headers={'Referer': 'https://www.vlive.tv/home'})
  77. return try_get(
  78. login_info, lambda x: x['message']['login'], bool) or False
  79. LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
  80. self._request_webpage(
  81. LOGIN_URL, None, note='Downloading login cookies')
  82. self._download_webpage(
  83. LOGIN_URL, None, note='Logging in',
  84. data=urlencode_postdata({'email': email, 'pwd': password}),
  85. headers={
  86. 'Referer': LOGIN_URL,
  87. 'Content-Type': 'application/x-www-form-urlencoded'
  88. })
  89. if not is_logged_in():
  90. raise ExtractorError('Unable to log in', expected=True)
  91. def _call_api(self, path_template, video_id, fields=None):
  92. query = {'appId': self._APP_ID}
  93. if fields:
  94. query['fields'] = fields
  95. try:
  96. return self._download_json(
  97. 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
  98. 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
  99. headers={'Referer': 'https://www.vlive.tv/'}, query=query)
  100. except ExtractorError as e:
  101. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
  102. self.raise_login_required(json.loads(e.cause.read().decode())['message'])
  103. raise
  104. def _real_extract(self, url):
  105. video_id = self._match_id(url)
  106. post = self._call_api(
  107. 'post/v1.0/officialVideoPost-%s', video_id,
  108. 'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
  109. video = post['officialVideo']
  110. def get_common_fields():
  111. channel = post.get('channel') or {}
  112. return {
  113. 'title': video.get('title'),
  114. 'creator': post.get('author', {}).get('nickname'),
  115. 'channel': channel.get('channelName'),
  116. 'channel_id': channel.get('channelCode'),
  117. 'duration': int_or_none(video.get('playTime')),
  118. 'view_count': int_or_none(video.get('playCount')),
  119. 'like_count': int_or_none(video.get('likeCount')),
  120. 'comment_count': int_or_none(video.get('commentCount')),
  121. }
  122. video_type = video.get('type')
  123. if video_type == 'VOD':
  124. inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
  125. vod_id = video['vodId']
  126. return merge_dicts(
  127. get_common_fields(),
  128. self._extract_video_info(video_id, vod_id, inkey))
  129. elif video_type == 'LIVE':
  130. status = video.get('status')
  131. if status == 'ON_AIR':
  132. stream_url = self._call_api(
  133. 'old/v3/live/%s/playInfo',
  134. video_id)['result']['adaptiveStreamUrl']
  135. formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
  136. info = get_common_fields()
  137. info.update({
  138. 'title': self._live_title(video['title']),
  139. 'id': video_id,
  140. 'formats': formats,
  141. 'is_live': True,
  142. })
  143. return info
  144. elif status == 'ENDED':
  145. raise ExtractorError(
  146. 'Uploading for replay. Please wait...', expected=True)
  147. elif status == 'RESERVED':
  148. raise ExtractorError('Coming soon!', expected=True)
  149. elif video.get('exposeStatus') == 'CANCEL':
  150. raise ExtractorError(
  151. 'We are sorry, but the live broadcast has been canceled.',
  152. expected=True)
  153. else:
  154. raise ExtractorError('Unknown status ' + status)
  155. class VLivePostIE(VLiveIE):
  156. IE_NAME = 'vlive:post'
  157. _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
  158. _TESTS = [{
  159. # uploadType = SOS
  160. 'url': 'https://www.vlive.tv/post/1-20088044',
  161. 'info_dict': {
  162. 'id': '1-20088044',
  163. 'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
  164. 'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
  165. },
  166. 'playlist_count': 3,
  167. }, {
  168. # uploadType = V
  169. 'url': 'https://www.vlive.tv/post/1-20087926',
  170. 'info_dict': {
  171. 'id': '1-20087926',
  172. 'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
  173. },
  174. 'playlist_count': 1,
  175. }]
  176. _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
  177. _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
  178. _INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
  179. def _real_extract(self, url):
  180. post_id = self._match_id(url)
  181. post = self._call_api(
  182. 'post/v1.0/post-%s', post_id,
  183. 'attachments{video},officialVideo{videoSeq},plainBody,title')
  184. video_seq = str_or_none(try_get(
  185. post, lambda x: x['officialVideo']['videoSeq']))
  186. if video_seq:
  187. return self.url_result(
  188. 'http://www.vlive.tv/video/' + video_seq,
  189. VLiveIE.ie_key(), video_seq)
  190. title = post['title']
  191. entries = []
  192. for idx, video in enumerate(post['attachments']['video'].values()):
  193. video_id = video.get('videoId')
  194. if not video_id:
  195. continue
  196. upload_type = video.get('uploadType')
  197. upload_info = video.get('uploadInfo') or {}
  198. entry = None
  199. if upload_type == 'SOS':
  200. download = self._call_api(
  201. self._SOS_TMPL, video_id)['videoUrl']['download']
  202. formats = []
  203. for f_id, f_url in download.items():
  204. formats.append({
  205. 'format_id': f_id,
  206. 'url': f_url,
  207. 'height': int_or_none(f_id[:-1]),
  208. })
  209. self._sort_formats(formats)
  210. entry = {
  211. 'formats': formats,
  212. 'id': video_id,
  213. 'thumbnail': upload_info.get('imageUrl'),
  214. }
  215. elif upload_type == 'V':
  216. vod_id = upload_info.get('videoId')
  217. if not vod_id:
  218. continue
  219. inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
  220. entry = self._extract_video_info(video_id, vod_id, inkey)
  221. if entry:
  222. entry['title'] = '%s_part%s' % (title, idx)
  223. entries.append(entry)
  224. return self.playlist_result(
  225. entries, post_id, title, strip_or_none(post.get('plainBody')))
  226. class VLiveChannelIE(VLiveBaseIE):
  227. IE_NAME = 'vlive:channel'
  228. _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
  229. _TESTS = [{
  230. 'url': 'http://channels.vlive.tv/FCD4B',
  231. 'info_dict': {
  232. 'id': 'FCD4B',
  233. 'title': 'MAMAMOO',
  234. },
  235. 'playlist_mincount': 110
  236. }, {
  237. 'url': 'https://www.vlive.tv/channel/FCD4B',
  238. 'only_matching': True,
  239. }]
  240. def _call_api(self, path, channel_key_suffix, channel_value, note, query):
  241. q = {
  242. 'app_id': self._APP_ID,
  243. 'channel' + channel_key_suffix: channel_value,
  244. }
  245. q.update(query)
  246. return self._download_json(
  247. 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
  248. channel_value, note='Downloading ' + note, query=q)['result']
  249. def _real_extract(self, url):
  250. channel_code = self._match_id(url)
  251. channel_seq = self._call_api(
  252. 'decodeChannelCode', 'Code', channel_code,
  253. 'decode channel code', {})['channelSeq']
  254. channel_name = None
  255. entries = []
  256. for page_num in itertools.count(1):
  257. video_list = self._call_api(
  258. 'getChannelVideoList', 'Seq', channel_seq,
  259. 'channel list page #%d' % page_num, {
  260. # Large values of maxNumOfRows (~300 or above) may cause
  261. # empty responses (see [1]), e.g. this happens for [2] that
  262. # has more than 300 videos.
  263. # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
  264. # 2. http://channels.vlive.tv/EDBF.
  265. 'maxNumOfRows': 100,
  266. 'pageNo': page_num
  267. }
  268. )
  269. if not channel_name:
  270. channel_name = try_get(
  271. video_list,
  272. lambda x: x['channelInfo']['channelName'],
  273. compat_str)
  274. videos = try_get(
  275. video_list, lambda x: x['videoList'], list)
  276. if not videos:
  277. break
  278. for video in videos:
  279. video_id = video.get('videoSeq')
  280. if not video_id:
  281. continue
  282. video_id = compat_str(video_id)
  283. entries.append(
  284. self.url_result(
  285. 'http://www.vlive.tv/video/%s' % video_id,
  286. ie=VLiveIE.ie_key(), video_id=video_id))
  287. return self.playlist_result(
  288. entries, channel_code, channel_name)