yandexmusic.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import hashlib
  4. import itertools
  5. import re
  6. from .common import InfoExtractor
  7. from ..compat import compat_str
  8. from ..utils import (
  9. ExtractorError,
  10. int_or_none,
  11. float_or_none,
  12. try_get,
  13. )
  14. class YandexMusicBaseIE(InfoExtractor):
  15. _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
  16. @staticmethod
  17. def _handle_error(response):
  18. if isinstance(response, dict):
  19. error = response.get('error')
  20. if error:
  21. raise ExtractorError(error, expected=True)
  22. if response.get('type') == 'captcha' or 'captcha' in response:
  23. YandexMusicBaseIE._raise_captcha()
  24. @staticmethod
  25. def _raise_captcha():
  26. raise ExtractorError(
  27. 'YandexMusic has considered youtube-dl requests automated and '
  28. 'asks you to solve a CAPTCHA. You can either wait for some '
  29. 'time until unblocked and optionally use --sleep-interval '
  30. 'in future or alternatively you can go to https://music.yandex.ru/ '
  31. 'solve CAPTCHA, then export cookies and pass cookie file to '
  32. 'youtube-dl with --cookies',
  33. expected=True)
  34. def _download_webpage_handle(self, *args, **kwargs):
  35. webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
  36. if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
  37. self._raise_captcha()
  38. return webpage
  39. def _download_json(self, *args, **kwargs):
  40. response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
  41. self._handle_error(response)
  42. return response
  43. def _call_api(self, ep, tld, url, item_id, note, query):
  44. return self._download_json(
  45. 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
  46. item_id, note,
  47. fatal=False,
  48. headers={
  49. 'Referer': url,
  50. 'X-Requested-With': 'XMLHttpRequest',
  51. 'X-Retpath-Y': url,
  52. },
  53. query=query)
  54. class YandexMusicTrackIE(YandexMusicBaseIE):
  55. IE_NAME = 'yandexmusic:track'
  56. IE_DESC = 'Яндекс.Музыка - Трек'
  57. _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  58. _TESTS = [{
  59. 'url': 'http://music.yandex.ru/album/540508/track/4878838',
  60. 'md5': 'dec8b661f12027ceaba33318787fff76',
  61. 'info_dict': {
  62. 'id': '4878838',
  63. 'ext': 'mp3',
  64. 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
  65. 'filesize': int,
  66. 'duration': 193.04,
  67. 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
  68. 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
  69. 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
  70. 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
  71. 'release_year': 2009,
  72. },
  73. # 'skip': 'Travis CI servers blocked by YandexMusic',
  74. }, {
  75. # multiple disks
  76. 'url': 'http://music.yandex.ru/album/3840501/track/705105',
  77. 'md5': '82a54e9e787301dd45aba093cf6e58c0',
  78. 'info_dict': {
  79. 'id': '705105',
  80. 'ext': 'mp3',
  81. 'title': 'md5:f86d4a9188279860a83000277024c1a6',
  82. 'filesize': int,
  83. 'duration': 239.27,
  84. 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
  85. 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
  86. 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  87. 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  88. 'release_year': 2016,
  89. 'genre': 'pop',
  90. 'disc_number': 2,
  91. 'track_number': 9,
  92. },
  93. # 'skip': 'Travis CI servers blocked by YandexMusic',
  94. }, {
  95. 'url': 'http://music.yandex.com/album/540508/track/4878838',
  96. 'only_matching': True,
  97. }]
  98. def _real_extract(self, url):
  99. mobj = re.match(self._VALID_URL, url)
  100. tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
  101. track = self._call_api(
  102. 'track', tld, url, track_id, 'Downloading track JSON',
  103. {'track': '%s:%s' % (track_id, album_id)})['track']
  104. track_title = track['title']
  105. download_data = self._download_json(
  106. 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
  107. track_id, 'Downloading track location url JSON',
  108. query={'hq': 1},
  109. headers={'X-Retpath-Y': url})
  110. fd_data = self._download_json(
  111. download_data['src'], track_id,
  112. 'Downloading track location JSON',
  113. query={'format': 'json'})
  114. key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
  115. f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
  116. thumbnail = None
  117. cover_uri = track.get('albums', [{}])[0].get('coverUri')
  118. if cover_uri:
  119. thumbnail = cover_uri.replace('%%', 'orig')
  120. if not thumbnail.startswith('http'):
  121. thumbnail = 'http://' + thumbnail
  122. track_info = {
  123. 'id': track_id,
  124. 'ext': 'mp3',
  125. 'url': f_url,
  126. 'filesize': int_or_none(track.get('fileSize')),
  127. 'duration': float_or_none(track.get('durationMs'), 1000),
  128. 'thumbnail': thumbnail,
  129. 'track': track_title,
  130. 'acodec': download_data.get('codec'),
  131. 'abr': int_or_none(download_data.get('bitrate')),
  132. }
  133. def extract_artist_name(artist):
  134. decomposed = artist.get('decomposed')
  135. if not isinstance(decomposed, list):
  136. return artist['name']
  137. parts = [artist['name']]
  138. for element in decomposed:
  139. if isinstance(element, dict) and element.get('name'):
  140. parts.append(element['name'])
  141. elif isinstance(element, compat_str):
  142. parts.append(element)
  143. return ''.join(parts)
  144. def extract_artist(artist_list):
  145. if artist_list and isinstance(artist_list, list):
  146. artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
  147. if artists_names:
  148. return ', '.join(artists_names)
  149. albums = track.get('albums')
  150. if albums and isinstance(albums, list):
  151. album = albums[0]
  152. if isinstance(album, dict):
  153. year = album.get('year')
  154. disc_number = int_or_none(try_get(
  155. album, lambda x: x['trackPosition']['volume']))
  156. track_number = int_or_none(try_get(
  157. album, lambda x: x['trackPosition']['index']))
  158. track_info.update({
  159. 'album': album.get('title'),
  160. 'album_artist': extract_artist(album.get('artists')),
  161. 'release_year': int_or_none(year),
  162. 'genre': album.get('genre'),
  163. 'disc_number': disc_number,
  164. 'track_number': track_number,
  165. })
  166. track_artist = extract_artist(track.get('artists'))
  167. if track_artist:
  168. track_info.update({
  169. 'artist': track_artist,
  170. 'title': '%s - %s' % (track_artist, track_title),
  171. })
  172. else:
  173. track_info['title'] = track_title
  174. return track_info
  175. class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
  176. def _extract_tracks(self, source, item_id, url, tld):
  177. tracks = source['tracks']
  178. track_ids = [compat_str(track_id) for track_id in source['trackIds']]
  179. # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
  180. # missing tracks should be retrieved manually.
  181. if len(tracks) < len(track_ids):
  182. present_track_ids = set([
  183. compat_str(track['id'])
  184. for track in tracks if track.get('id')])
  185. missing_track_ids = [
  186. track_id for track_id in track_ids
  187. if track_id not in present_track_ids]
  188. # Request missing tracks in chunks to avoid exceeding max HTTP header size,
  189. # see https://github.com/ytdl-org/youtube-dl/issues/27355
  190. _TRACKS_PER_CHUNK = 250
  191. for chunk_num in itertools.count(0):
  192. start = chunk_num * _TRACKS_PER_CHUNK
  193. end = start + _TRACKS_PER_CHUNK
  194. missing_track_ids_req = missing_track_ids[start:end]
  195. assert missing_track_ids_req
  196. missing_tracks = self._call_api(
  197. 'track-entries', tld, url, item_id,
  198. 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
  199. 'entries': ','.join(missing_track_ids_req),
  200. 'lang': tld,
  201. 'external-domain': 'music.yandex.%s' % tld,
  202. 'overembed': 'false',
  203. 'strict': 'true',
  204. })
  205. if missing_tracks:
  206. tracks.extend(missing_tracks)
  207. if end >= len(missing_track_ids):
  208. break
  209. return tracks
  210. def _build_playlist(self, tracks):
  211. entries = []
  212. for track in tracks:
  213. track_id = track.get('id') or track.get('realId')
  214. if not track_id:
  215. continue
  216. albums = track.get('albums')
  217. if not albums or not isinstance(albums, list):
  218. continue
  219. album = albums[0]
  220. if not isinstance(album, dict):
  221. continue
  222. album_id = album.get('id')
  223. if not album_id:
  224. continue
  225. entries.append(self.url_result(
  226. 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
  227. ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
  228. return entries
  229. class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
  230. IE_NAME = 'yandexmusic:album'
  231. IE_DESC = 'Яндекс.Музыка - Альбом'
  232. _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  233. _TESTS = [{
  234. 'url': 'http://music.yandex.ru/album/540508',
  235. 'info_dict': {
  236. 'id': '540508',
  237. 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
  238. },
  239. 'playlist_count': 50,
  240. # 'skip': 'Travis CI servers blocked by YandexMusic',
  241. }, {
  242. 'url': 'https://music.yandex.ru/album/3840501',
  243. 'info_dict': {
  244. 'id': '3840501',
  245. 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
  246. },
  247. 'playlist_count': 33,
  248. # 'skip': 'Travis CI servers blocked by YandexMusic',
  249. }, {
  250. # empty artists
  251. 'url': 'https://music.yandex.ru/album/9091882',
  252. 'info_dict': {
  253. 'id': '9091882',
  254. 'title': 'ТЕД на русском',
  255. },
  256. 'playlist_count': 187,
  257. }]
  258. @classmethod
  259. def suitable(cls, url):
  260. return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
  261. def _real_extract(self, url):
  262. mobj = re.match(self._VALID_URL, url)
  263. tld = mobj.group('tld')
  264. album_id = mobj.group('id')
  265. album = self._call_api(
  266. 'album', tld, url, album_id, 'Downloading album JSON',
  267. {'album': album_id})
  268. entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
  269. title = album['title']
  270. artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
  271. if artist:
  272. title = '%s - %s' % (artist, title)
  273. year = album.get('year')
  274. if year:
  275. title += ' (%s)' % year
  276. return self.playlist_result(entries, compat_str(album['id']), title)
  277. class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
  278. IE_NAME = 'yandexmusic:playlist'
  279. IE_DESC = 'Яндекс.Музыка - Плейлист'
  280. _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
  281. _TESTS = [{
  282. 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
  283. 'info_dict': {
  284. 'id': '1245',
  285. 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
  286. 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
  287. },
  288. 'playlist_count': 5,
  289. # 'skip': 'Travis CI servers blocked by YandexMusic',
  290. }, {
  291. 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
  292. 'only_matching': True,
  293. }, {
  294. # playlist exceeding the limit of 150 tracks (see
  295. # https://github.com/ytdl-org/youtube-dl/issues/6666)
  296. 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
  297. 'info_dict': {
  298. 'id': '1364',
  299. 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
  300. },
  301. 'playlist_mincount': 437,
  302. # 'skip': 'Travis CI servers blocked by YandexMusic',
  303. }]
  304. def _real_extract(self, url):
  305. mobj = re.match(self._VALID_URL, url)
  306. tld = mobj.group('tld')
  307. user = mobj.group('user')
  308. playlist_id = mobj.group('id')
  309. playlist = self._call_api(
  310. 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
  311. 'owner': user,
  312. 'kinds': playlist_id,
  313. 'light': 'true',
  314. 'lang': tld,
  315. 'external-domain': 'music.yandex.%s' % tld,
  316. 'overembed': 'false',
  317. })['playlist']
  318. tracks = self._extract_tracks(playlist, playlist_id, url, tld)
  319. return self.playlist_result(
  320. self._build_playlist(tracks),
  321. compat_str(playlist_id),
  322. playlist.get('title'), playlist.get('description'))
  323. class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
  324. def _call_artist(self, tld, url, artist_id):
  325. return self._call_api(
  326. 'artist', tld, url, artist_id,
  327. 'Downloading artist %s JSON' % self._ARTIST_WHAT, {
  328. 'artist': artist_id,
  329. 'what': self._ARTIST_WHAT,
  330. 'sort': self._ARTIST_SORT or '',
  331. 'dir': '',
  332. 'period': '',
  333. 'lang': tld,
  334. 'external-domain': 'music.yandex.%s' % tld,
  335. 'overembed': 'false',
  336. })
  337. def _real_extract(self, url):
  338. mobj = re.match(self._VALID_URL, url)
  339. tld = mobj.group('tld')
  340. artist_id = mobj.group('id')
  341. data = self._call_artist(tld, url, artist_id)
  342. tracks = self._extract_tracks(data, artist_id, url, tld)
  343. title = try_get(data, lambda x: x['artist']['name'], compat_str)
  344. return self.playlist_result(
  345. self._build_playlist(tracks), artist_id, title)
  346. class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
  347. IE_NAME = 'yandexmusic:artist:tracks'
  348. IE_DESC = 'Яндекс.Музыка - Артист - Треки'
  349. _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE
  350. _TESTS = [{
  351. 'url': 'https://music.yandex.ru/artist/617526/tracks',
  352. 'info_dict': {
  353. 'id': '617526',
  354. 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
  355. },
  356. 'playlist_count': 507,
  357. # 'skip': 'Travis CI servers blocked by YandexMusic',
  358. }]
  359. _ARTIST_SORT = ''
  360. _ARTIST_WHAT = 'tracks'
  361. def _real_extract(self, url):
  362. mobj = re.match(self._VALID_URL, url)
  363. tld = mobj.group('tld')
  364. artist_id = mobj.group('id')
  365. data = self._call_artist(tld, url, artist_id)
  366. tracks = self._extract_tracks(data, artist_id, url, tld)
  367. artist = try_get(data, lambda x: x['artist']['name'], compat_str)
  368. title = '%s - %s' % (artist or artist_id, 'Треки')
  369. return self.playlist_result(
  370. self._build_playlist(tracks), artist_id, title)
  371. class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
  372. IE_NAME = 'yandexmusic:artist:albums'
  373. IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
  374. _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE
  375. _TESTS = [{
  376. 'url': 'https://music.yandex.ru/artist/617526/albums',
  377. 'info_dict': {
  378. 'id': '617526',
  379. 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
  380. },
  381. 'playlist_count': 8,
  382. # 'skip': 'Travis CI servers blocked by YandexMusic',
  383. }]
  384. _ARTIST_SORT = 'year'
  385. _ARTIST_WHAT = 'albums'
  386. def _real_extract(self, url):
  387. mobj = re.match(self._VALID_URL, url)
  388. tld = mobj.group('tld')
  389. artist_id = mobj.group('id')
  390. data = self._call_artist(tld, url, artist_id)
  391. entries = []
  392. for album in data['albums']:
  393. if not isinstance(album, dict):
  394. continue
  395. album_id = album.get('id')
  396. if not album_id:
  397. continue
  398. entries.append(self.url_result(
  399. 'http://music.yandex.ru/album/%s' % album_id,
  400. ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
  401. artist = try_get(data, lambda x: x['artist']['name'], compat_str)
  402. title = '%s - %s' % (artist or artist_id, 'Альбомы')
  403. return self.playlist_result(entries, artist_id, title)