yandexmusic.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import hashlib
  5. from .common import InfoExtractor
  6. from ..compat import compat_str
  7. from ..utils import (
  8. ExtractorError,
  9. int_or_none,
  10. float_or_none,
  11. try_get,
  12. )
  13. class YandexMusicBaseIE(InfoExtractor):
  14. @staticmethod
  15. def _handle_error(response):
  16. if isinstance(response, dict):
  17. error = response.get('error')
  18. if error:
  19. raise ExtractorError(error, expected=True)
  20. if response.get('type') == 'captcha' or 'captcha' in response:
  21. YandexMusicBaseIE._raise_captcha()
  22. @staticmethod
  23. def _raise_captcha():
  24. raise ExtractorError(
  25. 'YandexMusic has considered youtube-dl requests automated and '
  26. 'asks you to solve a CAPTCHA. You can either wait for some '
  27. 'time until unblocked and optionally use --sleep-interval '
  28. 'in future or alternatively you can go to https://music.yandex.ru/ '
  29. 'solve CAPTCHA, then export cookies and pass cookie file to '
  30. 'youtube-dl with --cookies',
  31. expected=True)
  32. def _download_webpage_handle(self, *args, **kwargs):
  33. webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
  34. if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage:
  35. self._raise_captcha()
  36. return webpage
  37. def _download_json(self, *args, **kwargs):
  38. response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
  39. self._handle_error(response)
  40. return response
  41. def _call_api(self, ep, tld, url, item_id, note, query):
  42. return self._download_json(
  43. 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
  44. item_id, note,
  45. fatal=False,
  46. headers={
  47. 'Referer': url,
  48. 'X-Requested-With': 'XMLHttpRequest',
  49. 'X-Retpath-Y': url,
  50. },
  51. query=query)
  52. class YandexMusicTrackIE(YandexMusicBaseIE):
  53. IE_NAME = 'yandexmusic:track'
  54. IE_DESC = 'Яндекс.Музыка - Трек'
  55. _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
  56. _TESTS = [{
  57. 'url': 'http://music.yandex.ru/album/540508/track/4878838',
  58. 'md5': 'dec8b661f12027ceaba33318787fff76',
  59. 'info_dict': {
  60. 'id': '4878838',
  61. 'ext': 'mp3',
  62. 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
  63. 'filesize': int,
  64. 'duration': 193.04,
  65. 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
  66. 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
  67. 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
  68. 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
  69. 'release_year': 2009,
  70. },
  71. # 'skip': 'Travis CI servers blocked by YandexMusic',
  72. }, {
  73. # multiple disks
  74. 'url': 'http://music.yandex.ru/album/3840501/track/705105',
  75. 'md5': '82a54e9e787301dd45aba093cf6e58c0',
  76. 'info_dict': {
  77. 'id': '705105',
  78. 'ext': 'mp3',
  79. 'title': 'md5:f86d4a9188279860a83000277024c1a6',
  80. 'filesize': int,
  81. 'duration': 239.27,
  82. 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
  83. 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
  84. 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  85. 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
  86. 'release_year': 2016,
  87. 'genre': 'pop',
  88. 'disc_number': 2,
  89. 'track_number': 9,
  90. },
  91. # 'skip': 'Travis CI servers blocked by YandexMusic',
  92. }]
  93. def _real_extract(self, url):
  94. mobj = re.match(self._VALID_URL, url)
  95. tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
  96. track = self._call_api(
  97. 'track', tld, url, track_id, 'Downloading track JSON',
  98. {'track': '%s:%s' % (track_id, album_id)})['track']
  99. track_title = track['title']
  100. download_data = self._download_json(
  101. 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
  102. track_id, 'Downloading track location url JSON',
  103. headers={'X-Retpath-Y': url})
  104. fd_data = self._download_json(
  105. download_data['src'], track_id,
  106. 'Downloading track location JSON',
  107. query={'format': 'json'})
  108. key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
  109. f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
  110. thumbnail = None
  111. cover_uri = track.get('albums', [{}])[0].get('coverUri')
  112. if cover_uri:
  113. thumbnail = cover_uri.replace('%%', 'orig')
  114. if not thumbnail.startswith('http'):
  115. thumbnail = 'http://' + thumbnail
  116. track_info = {
  117. 'id': track_id,
  118. 'ext': 'mp3',
  119. 'url': f_url,
  120. 'filesize': int_or_none(track.get('fileSize')),
  121. 'duration': float_or_none(track.get('durationMs'), 1000),
  122. 'thumbnail': thumbnail,
  123. 'track': track_title,
  124. 'acodec': download_data.get('codec'),
  125. 'abr': int_or_none(download_data.get('bitrate')),
  126. }
  127. def extract_artist_name(artist):
  128. decomposed = artist.get('decomposed')
  129. if not isinstance(decomposed, list):
  130. return artist['name']
  131. parts = [artist['name']]
  132. for element in decomposed:
  133. if isinstance(element, dict) and element.get('name'):
  134. parts.append(element['name'])
  135. elif isinstance(element, compat_str):
  136. parts.append(element)
  137. return ''.join(parts)
  138. def extract_artist(artist_list):
  139. if artist_list and isinstance(artist_list, list):
  140. artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
  141. if artists_names:
  142. return ', '.join(artists_names)
  143. albums = track.get('albums')
  144. if albums and isinstance(albums, list):
  145. album = albums[0]
  146. if isinstance(album, dict):
  147. year = album.get('year')
  148. disc_number = int_or_none(try_get(
  149. album, lambda x: x['trackPosition']['volume']))
  150. track_number = int_or_none(try_get(
  151. album, lambda x: x['trackPosition']['index']))
  152. track_info.update({
  153. 'album': album.get('title'),
  154. 'album_artist': extract_artist(album.get('artists')),
  155. 'release_year': int_or_none(year),
  156. 'genre': album.get('genre'),
  157. 'disc_number': disc_number,
  158. 'track_number': track_number,
  159. })
  160. track_artist = extract_artist(track.get('artists'))
  161. if track_artist:
  162. track_info.update({
  163. 'artist': track_artist,
  164. 'title': '%s - %s' % (track_artist, track_title),
  165. })
  166. else:
  167. track_info['title'] = track_title
  168. return track_info
  169. class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
  170. def _extract_tracks(self, source, item_id, url, tld):
  171. tracks = source['tracks']
  172. track_ids = [compat_str(track_id) for track_id in source['trackIds']]
  173. # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
  174. # missing tracks should be retrieved manually.
  175. if len(tracks) < len(track_ids):
  176. present_track_ids = set([
  177. compat_str(track['id'])
  178. for track in tracks if track.get('id')])
  179. missing_track_ids = [
  180. track_id for track_id in track_ids
  181. if track_id not in present_track_ids]
  182. missing_tracks = self._call_api(
  183. 'track-entries', tld, url, item_id,
  184. 'Downloading missing tracks JSON', {
  185. 'entries': ','.join(missing_track_ids),
  186. 'lang': tld,
  187. 'external-domain': 'music.yandex.%s' % tld,
  188. 'overembed': 'false',
  189. 'strict': 'true',
  190. })
  191. if missing_tracks:
  192. tracks.extend(missing_tracks)
  193. return tracks
  194. def _build_playlist(self, tracks):
  195. entries = []
  196. for track in tracks:
  197. track_id = track.get('id') or track.get('realId')
  198. if not track_id:
  199. continue
  200. albums = track.get('albums')
  201. if not albums or not isinstance(albums, list):
  202. continue
  203. album = albums[0]
  204. if not isinstance(album, dict):
  205. continue
  206. album_id = album.get('id')
  207. if not album_id:
  208. continue
  209. entries.append(self.url_result(
  210. 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
  211. ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
  212. return entries
  213. class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
  214. IE_NAME = 'yandexmusic:album'
  215. IE_DESC = 'Яндекс.Музыка - Альбом'
  216. _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
  217. _TESTS = [{
  218. 'url': 'http://music.yandex.ru/album/540508',
  219. 'info_dict': {
  220. 'id': '540508',
  221. 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
  222. },
  223. 'playlist_count': 50,
  224. # 'skip': 'Travis CI servers blocked by YandexMusic',
  225. }, {
  226. 'url': 'https://music.yandex.ru/album/3840501',
  227. 'info_dict': {
  228. 'id': '3840501',
  229. 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
  230. },
  231. 'playlist_count': 33,
  232. # 'skip': 'Travis CI servers blocked by YandexMusic',
  233. }, {
  234. # empty artists
  235. 'url': 'https://music.yandex.ru/album/9091882',
  236. 'info_dict': {
  237. 'id': '9091882',
  238. 'title': 'ТЕД на русском',
  239. },
  240. 'playlist_count': 187,
  241. }]
  242. def _real_extract(self, url):
  243. mobj = re.match(self._VALID_URL, url)
  244. tld = mobj.group('tld')
  245. album_id = mobj.group('id')
  246. album = self._call_api(
  247. 'album', tld, url, album_id, 'Downloading album JSON',
  248. {'album': album_id})
  249. entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
  250. title = album['title']
  251. artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
  252. if artist:
  253. title = '%s - %s' % (artist, title)
  254. year = album.get('year')
  255. if year:
  256. title += ' (%s)' % year
  257. return self.playlist_result(entries, compat_str(album['id']), title)
  258. class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
  259. IE_NAME = 'yandexmusic:playlist'
  260. IE_DESC = 'Яндекс.Музыка - Плейлист'
  261. _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'
  262. _TESTS = [{
  263. 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
  264. 'info_dict': {
  265. 'id': '1245',
  266. 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
  267. 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
  268. },
  269. 'playlist_count': 5,
  270. # 'skip': 'Travis CI servers blocked by YandexMusic',
  271. }, {
  272. 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
  273. 'only_matching': True,
  274. }, {
  275. # playlist exceeding the limit of 150 tracks (see
  276. # https://github.com/ytdl-org/youtube-dl/issues/6666)
  277. 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
  278. 'info_dict': {
  279. 'id': '1364',
  280. 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
  281. },
  282. 'playlist_mincount': 437,
  283. # 'skip': 'Travis CI servers blocked by YandexMusic',
  284. }]
  285. def _real_extract(self, url):
  286. mobj = re.match(self._VALID_URL, url)
  287. tld = mobj.group('tld')
  288. user = mobj.group('user')
  289. playlist_id = mobj.group('id')
  290. playlist = self._call_api(
  291. 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
  292. 'owner': user,
  293. 'kinds': playlist_id,
  294. 'light': 'true',
  295. 'lang': tld,
  296. 'external-domain': 'music.yandex.%s' % tld,
  297. 'overembed': 'false',
  298. })['playlist']
  299. tracks = self._extract_tracks(playlist, playlist_id, url, tld)
  300. return self.playlist_result(
  301. self._build_playlist(tracks),
  302. compat_str(playlist_id),
  303. playlist.get('title'), playlist.get('description'))
  304. class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
  305. def _call_artist(self, tld, url, artist_id):
  306. return self._call_api(
  307. 'artist', tld, url, artist_id,
  308. 'Downloading artist %s JSON' % self._ARTIST_WHAT, {
  309. 'artist': artist_id,
  310. 'what': self._ARTIST_WHAT,
  311. 'sort': self._ARTIST_SORT or '',
  312. 'dir': '',
  313. 'period': '',
  314. 'lang': tld,
  315. 'external-domain': 'music.yandex.%s' % tld,
  316. 'overembed': 'false',
  317. })
  318. def _real_extract(self, url):
  319. mobj = re.match(self._VALID_URL, url)
  320. tld = mobj.group('tld')
  321. artist_id = mobj.group('id')
  322. data = self._call_artist(tld, url, artist_id)
  323. tracks = self._extract_tracks(data, artist_id, url, tld)
  324. title = try_get(data, lambda x: x['artist']['name'], compat_str)
  325. return self.playlist_result(
  326. self._build_playlist(tracks), artist_id, title)
  327. class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
  328. IE_NAME = 'yandexmusic:artist:tracks'
  329. IE_DESC = 'Яндекс.Музыка - Артист - Треки'
  330. _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/tracks'
  331. _TESTS = [{
  332. 'url': 'https://music.yandex.ru/artist/617526/tracks',
  333. 'info_dict': {
  334. 'id': '617526',
  335. 'title': 'md5:131aef29d45fd5a965ca613e708c040b',
  336. },
  337. 'playlist_count': 507,
  338. # 'skip': 'Travis CI servers blocked by YandexMusic',
  339. }]
  340. _ARTIST_SORT = ''
  341. _ARTIST_WHAT = 'tracks'
  342. def _real_extract(self, url):
  343. mobj = re.match(self._VALID_URL, url)
  344. tld = mobj.group('tld')
  345. artist_id = mobj.group('id')
  346. data = self._call_artist(tld, url, artist_id)
  347. tracks = self._extract_tracks(data, artist_id, url, tld)
  348. artist = try_get(data, lambda x: x['artist']['name'], compat_str)
  349. title = '%s - %s' % (artist or artist_id, 'Треки')
  350. return self.playlist_result(
  351. self._build_playlist(tracks), artist_id, title)
  352. class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
  353. IE_NAME = 'yandexmusic:artist:albums'
  354. IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
  355. _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/albums'
  356. _TESTS = [{
  357. 'url': 'https://music.yandex.ru/artist/617526/albums',
  358. 'info_dict': {
  359. 'id': '617526',
  360. 'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
  361. },
  362. 'playlist_count': 8,
  363. # 'skip': 'Travis CI servers blocked by YandexMusic',
  364. }]
  365. _ARTIST_SORT = 'year'
  366. _ARTIST_WHAT = 'albums'
  367. def _real_extract(self, url):
  368. mobj = re.match(self._VALID_URL, url)
  369. tld = mobj.group('tld')
  370. artist_id = mobj.group('id')
  371. data = self._call_artist(tld, url, artist_id)
  372. entries = []
  373. for album in data['albums']:
  374. if not isinstance(album, dict):
  375. continue
  376. album_id = album.get('id')
  377. if not album_id:
  378. continue
  379. entries.append(self.url_result(
  380. 'http://music.yandex.ru/album/%s' % album_id,
  381. ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
  382. artist = try_get(data, lambda x: x['artist']['name'], compat_str)
  383. title = '%s - %s' % (artist or artist_id, 'Альбомы')
  384. return self.playlist_result(entries, artist_id, title)