youtube.py 185 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import collections
  4. import hashlib
  5. import itertools
  6. import json
  7. import os.path
  8. import random
  9. import re
  10. import string
  11. import time
  12. import traceback
  13. from .common import InfoExtractor, SearchInfoExtractor
  14. from ..compat import (
  15. compat_chr,
  16. compat_HTTPError,
  17. compat_map as map,
  18. compat_str,
  19. compat_urllib_parse,
  20. compat_urllib_parse_parse_qs as compat_parse_qs,
  21. compat_urllib_parse_unquote_plus,
  22. compat_urllib_parse_urlparse,
  23. compat_zip as zip,
  24. )
  25. from ..jsinterp import JSInterpreter
  26. from ..utils import (
  27. bug_reports_message,
  28. clean_html,
  29. dict_get,
  30. error_to_compat_str,
  31. ExtractorError,
  32. filter_dict,
  33. float_or_none,
  34. get_first,
  35. extract_attributes,
  36. get_element_by_attribute,
  37. int_or_none,
  38. join_nonempty,
  39. js_to_json,
  40. LazyList,
  41. merge_dicts,
  42. mimetype2ext,
  43. NO_DEFAULT,
  44. parse_codecs,
  45. parse_count,
  46. parse_duration,
  47. parse_qs,
  48. qualities,
  49. remove_end,
  50. remove_start,
  51. smuggle_url,
  52. str_or_none,
  53. str_to_int,
  54. T,
  55. traverse_obj,
  56. try_call,
  57. try_get,
  58. txt_or_none,
  59. unescapeHTML,
  60. unified_strdate,
  61. unsmuggle_url,
  62. update_url,
  63. update_url_query,
  64. url_or_none,
  65. urlencode_postdata,
  66. urljoin,
  67. variadic,
  68. )
  69. class YoutubeBaseInfoExtractor(InfoExtractor):
  70. """Provide base functions for Youtube extractors"""
  71. _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  72. _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  73. _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  74. _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  75. _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  76. _NETRC_MACHINE = 'youtube'
  77. # If True it will raise an error if no login info is provided
  78. _LOGIN_REQUIRED = False
  79. _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
  80. _INNERTUBE_CLIENTS = {
  81. 'ios': {
  82. 'INNERTUBE_CONTEXT': {
  83. 'client': {
  84. 'clientName': 'IOS',
  85. 'clientVersion': '20.10.4',
  86. 'deviceMake': 'Apple',
  87. 'deviceModel': 'iPhone16,2',
  88. 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
  89. 'osName': 'iPhone',
  90. 'osVersion': '18.3.2.22D82',
  91. },
  92. },
  93. 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
  94. 'REQUIRE_JS_PLAYER': False,
  95. 'REQUIRE_PO_TOKEN': True,
  96. },
  97. # mweb has 'ultralow' formats
  98. # See: https://github.com/yt-dlp/yt-dlp/pull/557
  99. 'mweb': {
  100. 'INNERTUBE_CONTEXT': {
  101. 'client': {
  102. 'clientName': 'MWEB',
  103. 'clientVersion': '2.20250311.03.00',
  104. # mweb previously did not require PO Token with this UA
  105. 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
  106. },
  107. },
  108. 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
  109. 'REQUIRE_PO_TOKEN': True,
  110. 'SUPPORTS_COOKIES': True,
  111. },
  112. 'tv': {
  113. 'INNERTUBE_CONTEXT': {
  114. 'client': {
  115. 'clientName': 'TVHTML5',
  116. 'clientVersion': '7.20250312.16.00',
  117. 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
  118. },
  119. },
  120. 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
  121. 'SUPPORTS_COOKIES': True,
  122. },
  123. 'tv_simply': {
  124. 'INNERTUBE_CONTEXT': {
  125. 'client': {
  126. 'clientName': 'TVHTML5_SIMPLY',
  127. 'clientVersion': '1.0',
  128. },
  129. },
  130. 'INNERTUBE_CONTEXT_CLIENT_NAME': 75,
  131. },
  132. 'web': {
  133. 'INNERTUBE_CONTEXT': {
  134. 'client': {
  135. 'clientName': 'WEB',
  136. 'clientVersion': '2.20250312.04.00',
  137. },
  138. },
  139. 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
  140. 'REQUIRE_PO_TOKEN': True,
  141. 'SUPPORTS_COOKIES': True,
  142. 'PLAYER_PARAMS': '8AEB',
  143. },
  144. }
  145. def _login(self):
  146. """
  147. Attempt to log in to YouTube.
  148. True is returned if successful or skipped.
  149. False is returned if login failed.
  150. If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  151. """
  152. username, password = self._get_login_info()
  153. # No authentication to be performed
  154. if username is None:
  155. if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  156. raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  157. return True
  158. login_page = self._download_webpage(
  159. self._LOGIN_URL, None,
  160. note='Downloading login page',
  161. errnote='unable to fetch login page', fatal=False)
  162. if login_page is False:
  163. return
  164. login_form = self._hidden_inputs(login_page)
  165. def req(url, f_req, note, errnote):
  166. data = login_form.copy()
  167. data.update({
  168. 'pstMsg': 1,
  169. 'checkConnection': 'youtube',
  170. 'checkedDomains': 'youtube',
  171. 'hl': 'en',
  172. 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
  173. 'f.req': json.dumps(f_req),
  174. 'flowName': 'GlifWebSignIn',
  175. 'flowEntry': 'ServiceLogin',
  176. # TODO: reverse actual botguard identifier generation algo
  177. 'bgRequest': '["identifier",""]',
  178. })
  179. return self._download_json(
  180. url, None, note=note, errnote=errnote,
  181. transform_source=lambda s: re.sub(r'^[^[]*', '', s),
  182. fatal=False,
  183. data=urlencode_postdata(data), headers={
  184. 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
  185. 'Google-Accounts-XSRF': 1,
  186. })
  187. def warn(message):
  188. self._downloader.report_warning(message)
  189. lookup_req = [
  190. username,
  191. None, [], None, 'US', None, None, 2, False, True,
  192. [
  193. None, None,
  194. [2, 1, None, 1,
  195. 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
  196. None, [], 4],
  197. 1, [None, None, []], None, None, None, True,
  198. ],
  199. username,
  200. ]
  201. lookup_results = req(
  202. self._LOOKUP_URL, lookup_req,
  203. 'Looking up account info', 'Unable to look up account info')
  204. if lookup_results is False:
  205. return False
  206. user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
  207. if not user_hash:
  208. warn('Unable to extract user hash')
  209. return False
  210. challenge_req = [
  211. user_hash,
  212. None, 1, None, [1, None, None, None, [password, None, True]],
  213. [
  214. None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
  215. 1, [None, None, []], None, None, None, True,
  216. ]]
  217. challenge_results = req(
  218. self._CHALLENGE_URL, challenge_req,
  219. 'Logging in', 'Unable to log in')
  220. if challenge_results is False:
  221. return
  222. login_res = try_get(challenge_results, lambda x: x[0][5], list)
  223. if login_res:
  224. login_msg = try_get(login_res, lambda x: x[5], compat_str)
  225. warn(
  226. 'Unable to login: %s' % 'Invalid password'
  227. if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
  228. return False
  229. res = try_get(challenge_results, lambda x: x[0][-1], list)
  230. if not res:
  231. warn('Unable to extract result entry')
  232. return False
  233. login_challenge = try_get(res, lambda x: x[0][0], list)
  234. if login_challenge:
  235. challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
  236. if challenge_str == 'TWO_STEP_VERIFICATION':
  237. # SEND_SUCCESS - TFA code has been successfully sent to phone
  238. # QUOTA_EXCEEDED - reached the limit of TFA codes
  239. status = try_get(login_challenge, lambda x: x[5], compat_str)
  240. if status == 'QUOTA_EXCEEDED':
  241. warn('Exceeded the limit of TFA codes, try later')
  242. return False
  243. tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
  244. if not tl:
  245. warn('Unable to extract TL')
  246. return False
  247. tfa_code = self._get_tfa_info('2-step verification code')
  248. if not tfa_code:
  249. warn(
  250. 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
  251. '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
  252. return False
  253. tfa_code = remove_start(tfa_code, 'G-')
  254. tfa_req = [
  255. user_hash, None, 2, None,
  256. [
  257. 9, None, None, None, None, None, None, None,
  258. [None, tfa_code, True, 2],
  259. ]]
  260. tfa_results = req(
  261. self._TFA_URL.format(tl), tfa_req,
  262. 'Submitting TFA code', 'Unable to submit TFA code')
  263. if tfa_results is False:
  264. return False
  265. tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
  266. if tfa_res:
  267. tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
  268. warn(
  269. 'Unable to finish TFA: %s' % 'Invalid TFA code'
  270. if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
  271. return False
  272. check_cookie_url = try_get(
  273. tfa_results, lambda x: x[0][-1][2], compat_str)
  274. else:
  275. CHALLENGES = {
  276. 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
  277. 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
  278. 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
  279. }
  280. challenge = CHALLENGES.get(
  281. challenge_str,
  282. '%s returned error %s.' % (self.IE_NAME, challenge_str))
  283. warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
  284. return False
  285. else:
  286. check_cookie_url = try_get(res, lambda x: x[2], compat_str)
  287. if not check_cookie_url:
  288. warn('Unable to extract CheckCookie URL')
  289. return False
  290. check_cookie_results = self._download_webpage(
  291. check_cookie_url, None, 'Checking cookie', fatal=False)
  292. if check_cookie_results is False:
  293. return False
  294. if 'https://myaccount.google.com/' not in check_cookie_results:
  295. warn('Unable to log in')
  296. return False
  297. return True
  298. def _initialize_consent(self):
  299. cookies = self._get_cookies('https://www.youtube.com/')
  300. if cookies.get('__Secure-3PSID'):
  301. return
  302. socs = cookies.get('SOCS')
  303. if socs and not socs.value.startswith('CAA'): # not consented
  304. return
  305. self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
  306. def _real_initialize(self):
  307. self._initialize_consent()
  308. if self._downloader is None:
  309. return
  310. if not self._login():
  311. return
  312. _DEFAULT_API_DATA = {'context': _INNERTUBE_CLIENTS['web']['INNERTUBE_CONTEXT']}
  313. _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
  314. _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
  315. _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
  316. _SAPISID = None
  317. def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
  318. time_now = round(time.time())
  319. if self._SAPISID is None:
  320. yt_cookies = self._get_cookies('https://www.youtube.com')
  321. # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
  322. # See: https://github.com/yt-dlp/yt-dlp/issues/393
  323. sapisid_cookie = dict_get(
  324. yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
  325. if sapisid_cookie and sapisid_cookie.value:
  326. self._SAPISID = sapisid_cookie.value
  327. self.write_debug('Extracted SAPISID cookie')
  328. # SAPISID cookie is required if not already present
  329. if not yt_cookies.get('SAPISID'):
  330. self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
  331. self._set_cookie(
  332. '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
  333. else:
  334. self._SAPISID = False
  335. if not self._SAPISID:
  336. return None
  337. # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
  338. sapisidhash = hashlib.sha1(
  339. '{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest()
  340. return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash)
  341. def _call_api(self, ep, query, video_id, fatal=True, headers=None,
  342. note='Downloading API JSON'):
  343. data = self._DEFAULT_API_DATA.copy()
  344. data.update(query)
  345. real_headers = {'content-type': 'application/json'}
  346. if headers:
  347. real_headers.update(headers)
  348. # was: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
  349. api_key = self.get_param('youtube_innertube_key')
  350. return self._download_json(
  351. 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
  352. note=note, errnote='Unable to download API page',
  353. data=json.dumps(data).encode('utf8'), fatal=fatal,
  354. headers=real_headers, query=filter_dict({
  355. 'key': api_key,
  356. 'prettyPrint': 'false',
  357. }))
  358. def _extract_yt_initial_data(self, video_id, webpage):
  359. return self._parse_json(
  360. self._search_regex(
  361. (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
  362. self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
  363. video_id)
  364. def _extract_visitor_data(self, *args):
  365. """
  366. Extract visitorData from an API response or ytcfg
  367. Appears to be used to track session state
  368. """
  369. visitor_data = self.get_param('youtube_visitor_data')
  370. if visitor_data:
  371. return visitor_data
  372. return get_first(
  373. args, (('VISITOR_DATA',
  374. ('INNERTUBE_CONTEXT', 'client', 'visitorData'),
  375. ('responseContext', 'visitorData')),
  376. T(compat_str)))
  377. def _extract_ytcfg(self, video_id, webpage):
  378. ytcfg = self._search_json(
  379. r'ytcfg\.set\s*\(', webpage, 'ytcfg', video_id,
  380. end_pattern=r'\)\s*;', default={})
  381. traverse_obj(ytcfg, (
  382. 'INNERTUBE_CONTEXT', 'client', 'configInfo',
  383. T(lambda x: x.pop('appInstallData', None))))
  384. return ytcfg
  385. def _extract_video(self, renderer):
  386. video_id = renderer['videoId']
  387. title = try_get(
  388. renderer,
  389. (lambda x: x['title']['runs'][0]['text'],
  390. lambda x: x['title']['simpleText'],
  391. lambda x: x['headline']['simpleText']), compat_str)
  392. description = try_get(
  393. renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
  394. compat_str)
  395. duration = parse_duration(try_get(
  396. renderer, lambda x: x['lengthText']['simpleText'], compat_str))
  397. view_count_text = try_get(
  398. renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
  399. view_count = str_to_int(self._search_regex(
  400. r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
  401. 'view count', default=None))
  402. uploader = try_get(
  403. renderer,
  404. (lambda x: x['ownerText']['runs'][0]['text'],
  405. lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
  406. return {
  407. '_type': 'url',
  408. 'ie_key': YoutubeIE.ie_key(),
  409. 'id': video_id,
  410. 'url': video_id,
  411. 'title': title,
  412. 'description': description,
  413. 'duration': duration,
  414. 'view_count': view_count,
  415. 'uploader': uploader,
  416. }
  417. @staticmethod
  418. def _extract_thumbnails(data, *path_list, **kw_final_key):
  419. """
  420. Extract thumbnails from thumbnails dict
  421. @param path_list: path list to level that contains 'thumbnails' key
  422. """
  423. final_key = kw_final_key.get('final_key', 'thumbnails')
  424. return traverse_obj(data, ((
  425. tuple(variadic(path) + (final_key, Ellipsis)
  426. for path in path_list or [()])), {
  427. 'url': ('url', T(url_or_none),
  428. # Sometimes youtube gives a wrong thumbnail URL. See:
  429. # https://github.com/yt-dlp/yt-dlp/issues/233
  430. # https://github.com/ytdl-org/youtube-dl/issues/28023
  431. T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)),
  432. 'height': ('height', T(int_or_none)),
  433. 'width': ('width', T(int_or_none)),
  434. }, T(lambda t: t if t.get('url') else None)))
  435. def _search_results(self, query, params):
  436. data = {
  437. 'context': {
  438. 'client': {
  439. 'clientName': 'WEB',
  440. 'clientVersion': '2.20201021.03.00',
  441. },
  442. },
  443. 'query': query,
  444. }
  445. if params:
  446. data['params'] = params
  447. for page_num in itertools.count(1):
  448. search = self._download_json(
  449. 'https://www.youtube.com/youtubei/v1/search',
  450. video_id='query "%s"' % query,
  451. note='Downloading page %s' % page_num,
  452. errnote='Unable to download API page', fatal=False,
  453. data=json.dumps(data).encode('utf8'),
  454. query={
  455. # 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  456. 'prettyPrint': 'false',
  457. },
  458. headers={'content-type': 'application/json'})
  459. if not search:
  460. break
  461. slr_contents = try_get(
  462. search,
  463. (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
  464. lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
  465. list)
  466. if not slr_contents:
  467. break
  468. for slr_content in slr_contents:
  469. isr_contents = try_get(
  470. slr_content,
  471. lambda x: x['itemSectionRenderer']['contents'],
  472. list)
  473. if not isr_contents:
  474. continue
  475. for content in isr_contents:
  476. if not isinstance(content, dict):
  477. continue
  478. video = content.get('videoRenderer')
  479. if not isinstance(video, dict):
  480. continue
  481. video_id = video.get('videoId')
  482. if not video_id:
  483. continue
  484. yield self._extract_video(video)
  485. token = try_get(
  486. slr_contents,
  487. lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
  488. compat_str)
  489. if not token:
  490. break
  491. data['continuation'] = token
  492. @staticmethod
  493. def _owner_endpoints_path():
  494. return [
  495. Ellipsis,
  496. lambda k, _: k.endswith('SecondaryInfoRenderer'),
  497. ('owner', 'videoOwner'), 'videoOwnerRenderer', 'title',
  498. 'runs', Ellipsis]
  499. def _extract_channel_id(self, webpage, videodetails={}, metadata={}, renderers=[]):
  500. channel_id = None
  501. if any((videodetails, metadata, renderers)):
  502. channel_id = (
  503. traverse_obj(videodetails, 'channelId')
  504. or traverse_obj(metadata, 'externalChannelId', 'externalId')
  505. or traverse_obj(renderers,
  506. self._owner_endpoints_path() + [
  507. 'navigationEndpoint', 'browseEndpoint', 'browseId'],
  508. get_all=False)
  509. )
  510. return channel_id or self._html_search_meta(
  511. 'channelId', webpage, 'channel id', default=None)
  512. def _extract_author_var(self, webpage, var_name,
  513. videodetails={}, metadata={}, renderers=[]):
  514. result = None
  515. paths = {
  516. # (HTML, videodetails, metadata, renderers)
  517. 'name': ('content', 'author', (('ownerChannelName', None), 'title'), ['text']),
  518. 'url': ('href', 'ownerProfileUrl', 'vanityChannelUrl',
  519. ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl']),
  520. }
  521. if any((videodetails, metadata, renderers)):
  522. result = (
  523. traverse_obj(videodetails, paths[var_name][1], get_all=False)
  524. or traverse_obj(metadata, paths[var_name][2], get_all=False)
  525. or traverse_obj(renderers,
  526. self._owner_endpoints_path() + paths[var_name][3],
  527. get_all=False)
  528. )
  529. return result or traverse_obj(
  530. extract_attributes(self._search_regex(
  531. r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
  532. % re.escape(var_name),
  533. get_element_by_attribute('itemprop', 'author', webpage or '') or '',
  534. 'author link', default='')),
  535. paths[var_name][0])
  536. @staticmethod
  537. def _yt_urljoin(url_or_path):
  538. return urljoin('https://www.youtube.com', url_or_path)
  539. def _extract_uploader_id(self, uploader_url):
  540. return self._search_regex(
  541. r'/(?:(?:channel|user)/|(?=@))([^/?&#]+)', uploader_url or '',
  542. 'uploader id', default=None)
  543. class YoutubeIE(YoutubeBaseInfoExtractor):
  544. IE_DESC = 'YouTube.com'
  545. _INVIDIOUS_SITES = (
  546. # invidious-redirect websites
  547. r'(?:www\.)?redirect\.invidious\.io',
  548. r'(?:(?:www|dev)\.)?invidio\.us',
  549. # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
  550. r'(?:(?:www|no)\.)?invidiou\.sh',
  551. r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
  552. r'(?:www\.)?invidious\.kabi\.tk',
  553. r'(?:www\.)?invidious\.13ad\.de',
  554. r'(?:www\.)?invidious\.mastodon\.host',
  555. r'(?:www\.)?invidious\.zapashcanon\.fr',
  556. r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
  557. r'(?:www\.)?invidious\.tinfoil-hat\.net',
  558. r'(?:www\.)?invidious\.himiko\.cloud',
  559. r'(?:www\.)?invidious\.reallyancient\.tech',
  560. r'(?:www\.)?invidious\.tube',
  561. r'(?:www\.)?invidiou\.site',
  562. r'(?:www\.)?invidious\.site',
  563. r'(?:www\.)?invidious\.xyz',
  564. r'(?:www\.)?invidious\.nixnet\.xyz',
  565. r'(?:www\.)?invidious\.048596\.xyz',
  566. r'(?:www\.)?invidious\.drycat\.fr',
  567. r'(?:www\.)?inv\.skyn3t\.in',
  568. r'(?:www\.)?tube\.poal\.co',
  569. r'(?:www\.)?tube\.connect\.cafe',
  570. r'(?:www\.)?vid\.wxzm\.sx',
  571. r'(?:www\.)?vid\.mint\.lgbt',
  572. r'(?:www\.)?vid\.puffyan\.us',
  573. r'(?:www\.)?yewtu\.be',
  574. r'(?:www\.)?yt\.elukerio\.org',
  575. r'(?:www\.)?yt\.lelux\.fi',
  576. r'(?:www\.)?invidious\.ggc-project\.de',
  577. r'(?:www\.)?yt\.maisputain\.ovh',
  578. r'(?:www\.)?ytprivate\.com',
  579. r'(?:www\.)?invidious\.13ad\.de',
  580. r'(?:www\.)?invidious\.toot\.koeln',
  581. r'(?:www\.)?invidious\.fdn\.fr',
  582. r'(?:www\.)?watch\.nettohikari\.com',
  583. r'(?:www\.)?invidious\.namazso\.eu',
  584. r'(?:www\.)?invidious\.silkky\.cloud',
  585. r'(?:www\.)?invidious\.exonip\.de',
  586. r'(?:www\.)?invidious\.riverside\.rocks',
  587. r'(?:www\.)?invidious\.blamefran\.net',
  588. r'(?:www\.)?invidious\.moomoo\.de',
  589. r'(?:www\.)?ytb\.trom\.tf',
  590. r'(?:www\.)?yt\.cyberhost\.uk',
  591. r'(?:www\.)?kgg2m7yk5aybusll\.onion',
  592. r'(?:www\.)?qklhadlycap4cnod\.onion',
  593. r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
  594. r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
  595. r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
  596. r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
  597. r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
  598. r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
  599. r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
  600. r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
  601. r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
  602. r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
  603. )
  604. _VALID_URL = r"""(?x)^
  605. (
  606. (?:https?://|//) # http(s):// or protocol-independent URL
  607. (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
  608. (?:www\.)?deturl\.com/www\.youtube\.com|
  609. (?:www\.)?pwnyoutube\.com|
  610. (?:www\.)?hooktube\.com|
  611. (?:www\.)?yourepeat\.com|
  612. tube\.majestyc\.net|
  613. %(invidious)s|
  614. youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
  615. (?:.*?\#/)? # handle anchor (#/) redirect urls
  616. (?: # the various things that can precede the ID:
  617. (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
  618. |shorts/
  619. |(?: # or the v= param in all its forms
  620. (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
  621. (?:\?|\#!?) # the params delimiter ? or # or #!
  622. (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
  623. v=
  624. )
  625. ))
  626. |(?:
  627. youtu\.be| # just youtu.be/xxxx
  628. vid\.plus| # or vid.plus/xxxx
  629. zwearz\.com/watch| # or zwearz.com/watch/xxxx
  630. %(invidious)s
  631. )/
  632. |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
  633. )
  634. )? # all until now is optional -> you can pass the naked ID
  635. (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
  636. (?(1).+)? # if we found the ID, everything can follow
  637. $""" % {
  638. 'invidious': '|'.join(_INVIDIOUS_SITES),
  639. }
  640. _PLAYER_INFO_RE = (
  641. r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/(?:tv-)?player',
  642. r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias(?:_tce)?\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
  643. r'\b(?P<id>vfl[a-zA-Z0-9_-]{6,})\b.*?\.js$',
  644. )
  645. _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
  646. _GEO_BYPASS = False
  647. IE_NAME = 'youtube'
  648. _TESTS = [
  649. {
  650. 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
  651. 'info_dict': {
  652. 'id': 'BaW_jenozKc',
  653. 'ext': 'mp4',
  654. 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
  655. 'uploader': 'Philipp Hagemeister',
  656. 'uploader_id': '@PhilippHagemeister',
  657. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@PhilippHagemeister',
  658. 'channel': 'Philipp Hagemeister',
  659. 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
  660. 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
  661. 'upload_date': '20121002',
  662. 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
  663. 'categories': ['Science & Technology'],
  664. 'tags': ['youtube-dl'],
  665. 'duration': 10,
  666. 'view_count': int,
  667. 'like_count': int,
  668. 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
  669. 'start_time': 1,
  670. 'end_time': 9,
  671. },
  672. },
  673. {
  674. 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
  675. 'note': 'Embed-only video (#1746)',
  676. 'info_dict': {
  677. 'id': 'yZIXLfi8CZQ',
  678. 'ext': 'mp4',
  679. 'upload_date': '20120608',
  680. 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
  681. 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
  682. 'uploader': 'SET India',
  683. 'uploader_id': 'setindia',
  684. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
  685. 'age_limit': 18,
  686. },
  687. 'skip': 'Private video',
  688. },
  689. {
  690. 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
  691. 'note': 'Use the first video ID in the URL',
  692. 'info_dict': {
  693. 'id': 'BaW_jenozKc',
  694. 'ext': 'mp4',
  695. 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
  696. 'uploader': 'Philipp Hagemeister',
  697. 'uploader_id': '@PhilippHagemeister',
  698. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@PhilippHagemeister',
  699. 'upload_date': '20121002',
  700. 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
  701. 'categories': ['Science & Technology'],
  702. 'tags': ['youtube-dl'],
  703. 'duration': 10,
  704. 'view_count': int,
  705. 'like_count': int,
  706. },
  707. 'params': {
  708. 'skip_download': True,
  709. },
  710. },
  711. {
  712. 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
  713. 'note': '256k DASH audio (format 141) via DASH manifest',
  714. 'info_dict': {
  715. 'id': 'a9LDPn-MO4I',
  716. 'ext': 'm4a',
  717. 'upload_date': '20121002',
  718. 'uploader_id': '8KVIDEO',
  719. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
  720. 'description': '',
  721. 'uploader': '8KVIDEO',
  722. 'title': 'UHDTV TEST 8K VIDEO.mp4',
  723. },
  724. 'params': {
  725. 'youtube_include_dash_manifest': True,
  726. 'format': '141',
  727. },
  728. 'skip': 'format 141 not served any more',
  729. },
  730. # DASH manifest with encrypted signature
  731. {
  732. 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
  733. 'info_dict': {
  734. 'id': 'IB3lcPjvWLA',
  735. 'ext': 'm4a',
  736. 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
  737. 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
  738. 'duration': 244,
  739. 'uploader': 'AfrojackVEVO',
  740. 'uploader_id': '@AfrojackVEVO',
  741. 'upload_date': '20131011',
  742. 'abr': 129.495,
  743. },
  744. 'params': {
  745. 'youtube_include_dash_manifest': True,
  746. 'format': '141/bestaudio[ext=m4a]',
  747. },
  748. },
  749. # Controversy video
  750. {
  751. 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
  752. 'info_dict': {
  753. 'id': 'T4XJQO3qol8',
  754. 'ext': 'mp4',
  755. 'duration': 219,
  756. 'upload_date': '20100909',
  757. 'uploader': 'Amazing Atheist',
  758. 'uploader_id': '@theamazingatheist',
  759. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@theamazingatheist',
  760. 'title': 'Burning Everyone\'s Koran',
  761. 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
  762. },
  763. },
  764. # Age-gated videos
  765. {
  766. 'note': 'Age-gated video (No vevo, embed allowed)',
  767. 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
  768. 'info_dict': {
  769. 'id': 'HtVdAasjOgU',
  770. 'ext': 'mp4',
  771. 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
  772. 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
  773. 'duration': 142,
  774. 'uploader': 'The Witcher',
  775. 'uploader_id': '@thewitcher',
  776. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@thewitcher',
  777. 'upload_date': '20140605',
  778. 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
  779. 'age_limit': 18,
  780. 'categories': ['Gaming'],
  781. 'tags': 'count:17',
  782. 'channel': 'The Witcher',
  783. 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
  784. 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
  785. 'view_count': int,
  786. 'like_count': int,
  787. },
  788. },
  789. {
  790. 'note': 'Age-gated video with embed allowed in public site',
  791. 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
  792. 'info_dict': {
  793. 'id': 'HsUATh_Nc2U',
  794. 'ext': 'mp4',
  795. 'title': 'Godzilla 2 (Official Video)',
  796. 'description': 'md5:bf77e03fcae5529475e500129b05668a',
  797. 'duration': 177,
  798. 'uploader': 'FlyingKitty',
  799. 'uploader_id': '@FlyingKitty900',
  800. 'upload_date': '20200408',
  801. 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
  802. 'age_limit': 18,
  803. 'categories': ['Entertainment'],
  804. 'tags': ['Flyingkitty', 'godzilla 2'],
  805. 'channel': 'FlyingKitty',
  806. 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
  807. 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
  808. 'view_count': int,
  809. 'like_count': int,
  810. },
  811. },
  812. {
  813. 'note': 'Age-gated video embeddable only with clientScreen=EMBED',
  814. 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
  815. 'info_dict': {
  816. 'id': 'Tq92D6wQ1mg',
  817. 'ext': 'mp4',
  818. 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
  819. 'description': 'md5:17eccca93a786d51bc67646756894066',
  820. 'duration': 106,
  821. 'uploader': 'Projekt Melody',
  822. 'uploader_id': '@ProjektMelody',
  823. 'upload_date': '20191227',
  824. 'age_limit': 18,
  825. 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
  826. 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
  827. 'categories': ['Entertainment'],
  828. 'channel': 'Projekt Melody',
  829. 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
  830. 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
  831. 'view_count': int,
  832. 'like_count': int,
  833. },
  834. },
  835. {
  836. 'note': 'Non-Age-gated non-embeddable video',
  837. 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
  838. 'info_dict': {
  839. 'id': 'MeJVWBSsPAY',
  840. 'ext': 'mp4',
  841. 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
  842. 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
  843. 'duration': 210,
  844. 'upload_date': '20130730',
  845. 'uploader': 'Herr Lurik',
  846. 'uploader_id': '@HerrLurik',
  847. 'uploader_url': 'http://www.youtube.com/@HerrLurik',
  848. 'age_limit': 0,
  849. 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
  850. 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
  851. 'categories': ['Music'],
  852. 'channel': 'Herr Lurik',
  853. 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
  854. 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
  855. 'artist': 'OOMPH!',
  856. 'view_count': int,
  857. 'like_count': int,
  858. },
  859. },
  860. {
  861. 'note': 'Non-bypassable age-gated video',
  862. 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
  863. 'only_matching': True,
  864. },
  865. {
  866. 'note': 'Age-gated video only available with authentication (not via embed workaround)',
  867. 'url': 'XgnwCQzjau8',
  868. 'only_matching': True,
  869. 'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
  870. },
  871. # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
  872. # YouTube Red ad is not captured for creator
  873. {
  874. 'url': '__2ABJjxzNo',
  875. 'info_dict': {
  876. 'id': '__2ABJjxzNo',
  877. 'ext': 'mp4',
  878. 'duration': 266,
  879. 'upload_date': '20100430',
  880. 'uploader_id': '@deadmau5',
  881. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@deadmau5',
  882. 'creator': 'deadmau5',
  883. 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
  884. 'uploader': 'deadmau5',
  885. 'title': 'Deadmau5 - Some Chords (HD)',
  886. 'alt_title': 'Some Chords',
  887. },
  888. 'expected_warnings': [
  889. 'DASH manifest missing',
  890. ],
  891. },
  892. # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
  893. {
  894. 'url': 'lqQg6PlCWgI',
  895. 'info_dict': {
  896. 'id': 'lqQg6PlCWgI',
  897. 'ext': 'mp4',
  898. 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
  899. 'description': r're:(?s)(?:.+\s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
  900. 'duration': 6085,
  901. 'upload_date': '20150827',
  902. 'uploader_id': '@Olympics',
  903. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@Olympics',
  904. 'uploader': r're:Olympics?',
  905. 'age_limit': 0,
  906. 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
  907. 'categories': ['Sports'],
  908. 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
  909. 'channel': 'Olympics',
  910. 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
  911. 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
  912. 'view_count': int,
  913. 'like_count': int,
  914. },
  915. },
  916. # Non-square pixels
  917. {
  918. 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
  919. 'info_dict': {
  920. 'id': '_b-2C3KPAM0',
  921. 'ext': 'mp4',
  922. 'stretched_ratio': 16 / 9.,
  923. 'duration': 85,
  924. 'upload_date': '20110310',
  925. 'uploader_id': '@AllenMeow',
  926. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@AllenMeow',
  927. 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
  928. 'uploader': '孫ᄋᄅ',
  929. 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
  930. },
  931. },
  932. # url_encoded_fmt_stream_map is empty string
  933. {
  934. 'url': 'qEJwOuvDf7I',
  935. 'info_dict': {
  936. 'id': 'qEJwOuvDf7I',
  937. 'ext': 'webm',
  938. 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
  939. 'description': '',
  940. 'upload_date': '20150404',
  941. 'uploader_id': 'spbelect',
  942. 'uploader': 'Наблюдатели Петербурга',
  943. },
  944. 'params': {
  945. 'skip_download': 'requires avconv',
  946. },
  947. 'skip': 'This live event has ended.',
  948. },
  949. # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
  950. {
  951. 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
  952. 'info_dict': {
  953. 'id': 'FIl7x6_3R5Y',
  954. 'ext': 'webm',
  955. 'title': 'md5:7b81415841e02ecd4313668cde88737a',
  956. 'description': 'md5:116377fd2963b81ec4ce64b542173306',
  957. 'duration': 220,
  958. 'upload_date': '20150625',
  959. 'uploader_id': 'dorappi2000',
  960. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
  961. 'uploader': 'dorappi2000',
  962. 'formats': 'mincount:31',
  963. },
  964. 'skip': 'not actual any more',
  965. },
  966. # DASH manifest with segment_list
  967. {
  968. 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
  969. 'md5': '8ce563a1d667b599d21064e982ab9e31',
  970. 'info_dict': {
  971. 'id': 'CsmdDsKjzN8',
  972. 'ext': 'mp4',
  973. 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
  974. 'uploader': 'Airtek',
  975. 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
  976. 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
  977. 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
  978. },
  979. 'params': {
  980. 'youtube_include_dash_manifest': True,
  981. 'format': '135', # bestvideo
  982. },
  983. 'skip': 'This live event has ended.',
  984. },
  985. {
  986. # Multifeed videos (multiple cameras), URL is for Main Camera
  987. 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
  988. 'info_dict': {
  989. 'id': 'jvGDaLqkpTg',
  990. 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
  991. 'description': 'md5:e03b909557865076822aa169218d6a5d',
  992. },
  993. 'playlist': [{
  994. 'info_dict': {
  995. 'id': 'jvGDaLqkpTg',
  996. 'ext': 'mp4',
  997. 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
  998. 'description': 'md5:e03b909557865076822aa169218d6a5d',
  999. 'duration': 10643,
  1000. 'upload_date': '20161111',
  1001. 'uploader': 'Team PGP',
  1002. 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
  1003. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
  1004. },
  1005. }, {
  1006. 'info_dict': {
  1007. 'id': '3AKt1R1aDnw',
  1008. 'ext': 'mp4',
  1009. 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
  1010. 'description': 'md5:e03b909557865076822aa169218d6a5d',
  1011. 'duration': 10991,
  1012. 'upload_date': '20161111',
  1013. 'uploader': 'Team PGP',
  1014. 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
  1015. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
  1016. },
  1017. }, {
  1018. 'info_dict': {
  1019. 'id': 'RtAMM00gpVc',
  1020. 'ext': 'mp4',
  1021. 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
  1022. 'description': 'md5:e03b909557865076822aa169218d6a5d',
  1023. 'duration': 10995,
  1024. 'upload_date': '20161111',
  1025. 'uploader': 'Team PGP',
  1026. 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
  1027. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
  1028. },
  1029. }, {
  1030. 'info_dict': {
  1031. 'id': '6N2fdlP3C5U',
  1032. 'ext': 'mp4',
  1033. 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
  1034. 'description': 'md5:e03b909557865076822aa169218d6a5d',
  1035. 'duration': 10990,
  1036. 'upload_date': '20161111',
  1037. 'uploader': 'Team PGP',
  1038. 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
  1039. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
  1040. },
  1041. }],
  1042. 'params': {
  1043. 'skip_download': True,
  1044. },
  1045. 'skip': 'Not multifeed any more',
  1046. },
  1047. {
  1048. # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
  1049. 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
  1050. 'info_dict': {
  1051. 'id': 'gVfLd0zydlo',
  1052. 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
  1053. },
  1054. 'playlist_count': 2,
  1055. 'skip': 'Not multifeed any more',
  1056. },
  1057. {
  1058. 'url': 'https://vid.plus/FlRa-iH7PGw',
  1059. 'only_matching': True,
  1060. },
  1061. {
  1062. 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
  1063. 'only_matching': True,
  1064. },
  1065. {
  1066. # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
  1067. # Also tests cut-off URL expansion in video description (see
  1068. # https://github.com/ytdl-org/youtube-dl/issues/1892,
  1069. # https://github.com/ytdl-org/youtube-dl/issues/8164)
  1070. 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
  1071. 'info_dict': {
  1072. 'id': 'lsguqyKfVQg',
  1073. 'ext': 'mp4',
  1074. 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
  1075. 'alt_title': 'Dark Walk',
  1076. 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
  1077. 'duration': 133,
  1078. 'upload_date': '20151119',
  1079. 'uploader_id': '@IronSoulElf',
  1080. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@IronSoulElf',
  1081. 'uploader': 'IronSoulElf',
  1082. 'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
  1083. 'track': 'Dark Walk',
  1084. 'artist': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
  1085. 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
  1086. },
  1087. 'params': {
  1088. 'skip_download': True,
  1089. },
  1090. },
  1091. {
  1092. # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
  1093. 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
  1094. 'only_matching': True,
  1095. },
  1096. {
  1097. # Video with yt:stretch=17:0
  1098. 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
  1099. 'info_dict': {
  1100. 'id': 'Q39EVAstoRM',
  1101. 'ext': 'mp4',
  1102. 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
  1103. 'description': 'md5:ee18a25c350637c8faff806845bddee9',
  1104. 'upload_date': '20151107',
  1105. 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
  1106. 'uploader': 'CH GAMER DROID',
  1107. },
  1108. 'params': {
  1109. 'skip_download': True,
  1110. },
  1111. 'skip': 'This video does not exist.',
  1112. },
  1113. {
  1114. # Video with incomplete 'yt:stretch=16:'
  1115. 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
  1116. 'only_matching': True,
  1117. },
  1118. {
  1119. # Video licensed under Creative Commons
  1120. 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
  1121. 'info_dict': {
  1122. 'id': 'M4gD1WSo5mA',
  1123. 'ext': 'mp4',
  1124. 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
  1125. 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
  1126. 'duration': 721,
  1127. 'upload_date': '20150127',
  1128. 'uploader_id': '@BKCHarvard',
  1129. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@BKCHarvard',
  1130. 'uploader': 'The Berkman Klein Center for Internet & Society',
  1131. 'license': 'Creative Commons Attribution license (reuse allowed)',
  1132. },
  1133. 'params': {
  1134. 'skip_download': True,
  1135. },
  1136. },
  1137. {
  1138. # Channel-like uploader_url
  1139. 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
  1140. 'info_dict': {
  1141. 'id': 'eQcmzGIKrzg',
  1142. 'ext': 'mp4',
  1143. 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
  1144. 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
  1145. 'duration': 4060,
  1146. 'upload_date': '20151119',
  1147. 'uploader': 'Bernie Sanders',
  1148. 'uploader_id': '@BernieSanders',
  1149. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@BernieSanders',
  1150. 'license': 'Creative Commons Attribution license (reuse allowed)',
  1151. },
  1152. 'params': {
  1153. 'skip_download': True,
  1154. },
  1155. },
  1156. {
  1157. 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
  1158. 'only_matching': True,
  1159. },
  1160. {
  1161. # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
  1162. 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
  1163. 'only_matching': True,
  1164. },
  1165. {
  1166. # Rental video preview
  1167. 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
  1168. 'info_dict': {
  1169. 'id': 'uGpuVWrhIzE',
  1170. 'ext': 'mp4',
  1171. 'title': 'Piku - Trailer',
  1172. 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
  1173. 'upload_date': '20150811',
  1174. 'uploader': 'FlixMatrix',
  1175. 'uploader_id': 'FlixMatrixKaravan',
  1176. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
  1177. 'license': 'Standard YouTube License',
  1178. },
  1179. 'params': {
  1180. 'skip_download': True,
  1181. },
  1182. 'skip': 'This video is not available.',
  1183. },
  1184. {
  1185. # YouTube Red video with episode data
  1186. 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
  1187. 'info_dict': {
  1188. 'id': 'iqKdEhx-dD4',
  1189. 'ext': 'mp4',
  1190. 'title': 'Isolation - Mind Field (Ep 1)',
  1191. 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
  1192. 'duration': 2085,
  1193. 'upload_date': '20170118',
  1194. 'uploader': 'Vsauce',
  1195. 'uploader_id': '@Vsauce',
  1196. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@Vsauce',
  1197. 'series': 'Mind Field',
  1198. 'season_number': 1,
  1199. 'episode_number': 1,
  1200. },
  1201. 'params': {
  1202. 'skip_download': True,
  1203. },
  1204. 'expected_warnings': [
  1205. 'Skipping DASH manifest',
  1206. ],
  1207. },
  1208. {
  1209. # The following content has been identified by the YouTube community
  1210. # as inappropriate or offensive to some audiences.
  1211. 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
  1212. 'info_dict': {
  1213. 'id': '6SJNVb0GnPI',
  1214. 'ext': 'mp4',
  1215. 'title': 'Race Differences in Intelligence',
  1216. 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
  1217. 'duration': 965,
  1218. 'upload_date': '20140124',
  1219. 'uploader': 'New Century Foundation',
  1220. 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
  1221. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
  1222. },
  1223. 'params': {
  1224. 'skip_download': True,
  1225. },
  1226. 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
  1227. },
  1228. {
  1229. # itag 212
  1230. 'url': '1t24XAntNCY',
  1231. 'only_matching': True,
  1232. },
  1233. {
  1234. # geo restricted to JP
  1235. 'url': 'sJL6WA-aGkQ',
  1236. 'only_matching': True,
  1237. },
  1238. {
  1239. 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
  1240. 'only_matching': True,
  1241. },
  1242. {
  1243. 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
  1244. 'only_matching': True,
  1245. },
  1246. {
  1247. # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
  1248. 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
  1249. 'only_matching': True,
  1250. },
  1251. {
  1252. # DRM protected
  1253. 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
  1254. 'only_matching': True,
  1255. },
  1256. {
  1257. # Video with unsupported adaptive stream type formats
  1258. 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
  1259. 'info_dict': {
  1260. 'id': 'Z4Vy8R84T1U',
  1261. 'ext': 'mp4',
  1262. 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
  1263. 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  1264. 'duration': 433,
  1265. 'upload_date': '20130923',
  1266. 'uploader': 'Amelia Putri Harwita',
  1267. 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
  1268. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
  1269. 'formats': 'maxcount:10',
  1270. },
  1271. 'params': {
  1272. 'skip_download': True,
  1273. 'youtube_include_dash_manifest': False,
  1274. },
  1275. 'skip': 'not actual any more',
  1276. },
  1277. {
  1278. # Youtube Music Auto-generated description
  1279. 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
  1280. 'info_dict': {
  1281. 'id': 'MgNrAu2pzNs',
  1282. 'ext': 'mp4',
  1283. 'title': 'Voyeur Girl',
  1284. 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
  1285. 'upload_date': '20190312',
  1286. 'uploader': 'Stephen - Topic',
  1287. 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
  1288. 'artist': 'Stephen',
  1289. 'track': 'Voyeur Girl',
  1290. 'album': 'it\'s too much love to know my dear',
  1291. 'release_date': '20190313',
  1292. 'release_year': 2019,
  1293. },
  1294. 'params': {
  1295. 'skip_download': True,
  1296. },
  1297. },
  1298. {
  1299. 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
  1300. 'only_matching': True,
  1301. },
  1302. {
  1303. # invalid -> valid video id redirection
  1304. 'url': 'DJztXj2GPfl',
  1305. 'info_dict': {
  1306. 'id': 'DJztXj2GPfk',
  1307. 'ext': 'mp4',
  1308. 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
  1309. 'description': 'md5:bf577a41da97918e94fa9798d9228825',
  1310. 'upload_date': '20090125',
  1311. 'uploader': 'Prochorowka',
  1312. 'uploader_id': 'Prochorowka',
  1313. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
  1314. 'artist': 'Panjabi MC',
  1315. 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
  1316. 'album': 'Beware of the Boys (Mundian To Bach Ke)',
  1317. },
  1318. 'params': {
  1319. 'skip_download': True,
  1320. },
  1321. 'skip': 'Video unavailable',
  1322. },
  1323. {
  1324. # empty description results in an empty string
  1325. 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
  1326. 'info_dict': {
  1327. 'id': 'x41yOUIvK2k',
  1328. 'ext': 'mp4',
  1329. 'title': 'IMG 3456',
  1330. 'description': '',
  1331. 'upload_date': '20170613',
  1332. 'uploader': "l'Or Vert asbl",
  1333. 'uploader_id': '@ElevageOrVert',
  1334. },
  1335. 'params': {
  1336. 'skip_download': True,
  1337. },
  1338. },
  1339. {
  1340. # with '};' inside yt initial data (see [1])
  1341. # see [2] for an example with '};' inside ytInitialPlayerResponse
  1342. # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
  1343. # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
  1344. 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
  1345. 'info_dict': {
  1346. 'id': 'CHqg6qOn4no',
  1347. 'ext': 'mp4',
  1348. 'title': 'Part 77 Sort a list of simple types in c#',
  1349. 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
  1350. 'upload_date': '20130831',
  1351. 'uploader': 'kudvenkat',
  1352. 'uploader_id': '@Csharp-video-tutorialsBlogspot',
  1353. },
  1354. 'params': {
  1355. 'skip_download': True,
  1356. },
  1357. },
  1358. {
  1359. # another example of '};' in ytInitialData
  1360. 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
  1361. 'only_matching': True,
  1362. },
  1363. {
  1364. 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
  1365. 'only_matching': True,
  1366. },
  1367. {
  1368. # https://github.com/ytdl-org/youtube-dl/pull/28094
  1369. 'url': 'OtqTfy26tG0',
  1370. 'info_dict': {
  1371. 'id': 'OtqTfy26tG0',
  1372. 'ext': 'mp4',
  1373. 'title': 'Burn Out',
  1374. 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
  1375. 'upload_date': '20141120',
  1376. 'uploader': 'The Cinematic Orchestra - Topic',
  1377. 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
  1378. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
  1379. 'artist': 'The Cinematic Orchestra',
  1380. 'track': 'Burn Out',
  1381. 'album': 'Every Day',
  1382. 'release_data': None,
  1383. 'release_year': None,
  1384. },
  1385. 'params': {
  1386. 'skip_download': True,
  1387. },
  1388. },
  1389. {
  1390. # controversial video, only works with bpctr when authenticated with cookies
  1391. 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
  1392. 'only_matching': True,
  1393. },
  1394. {
  1395. # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
  1396. 'url': 'cBvYw8_A0vQ',
  1397. 'info_dict': {
  1398. 'id': 'cBvYw8_A0vQ',
  1399. 'ext': 'mp4',
  1400. 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
  1401. 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
  1402. 'upload_date': '20201120',
  1403. 'uploader': 'Walk around Japan',
  1404. 'uploader_id': '@walkaroundjapan7124',
  1405. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@walkaroundjapan7124',
  1406. },
  1407. 'params': {
  1408. 'skip_download': True,
  1409. },
  1410. },
  1411. {
  1412. # YT 'Shorts'
  1413. 'url': 'https://youtube.com/shorts/4L2J27mJ3Dc',
  1414. 'info_dict': {
  1415. 'id': '4L2J27mJ3Dc',
  1416. 'ext': 'mp4',
  1417. 'title': 'Midwest Squid Game #Shorts',
  1418. 'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
  1419. 'upload_date': '20211025',
  1420. 'uploader': 'Charlie Berens',
  1421. 'uploader_id': '@CharlieBerens',
  1422. },
  1423. 'params': {
  1424. 'skip_download': True,
  1425. },
  1426. },
  1427. ]
  1428. _formats = {
  1429. '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
  1430. '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
  1431. '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
  1432. '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
  1433. '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
  1434. '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  1435. '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  1436. '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  1437. # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
  1438. '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
  1439. '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  1440. '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  1441. '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
  1442. '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
  1443. '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
  1444. '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
  1445. '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  1446. '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  1447. # 3D videos
  1448. '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
  1449. '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
  1450. '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
  1451. '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
  1452. '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
  1453. '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
  1454. '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
  1455. # Apple HTTP Live Streaming
  1456. '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  1457. '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  1458. '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
  1459. '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
  1460. '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
  1461. '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
  1462. '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  1463. '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
  1464. # DASH mp4 video
  1465. '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1466. '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1467. '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1468. '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1469. '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1470. '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
  1471. '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1472. '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1473. '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1474. '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
  1475. '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
  1476. '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1477. # Dash mp4 audio
  1478. '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
  1479. '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
  1480. '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
  1481. '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
  1482. '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
  1483. '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
  1484. '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
  1485. # Dash webm
  1486. '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1487. '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1488. '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1489. '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1490. '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1491. '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1492. '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
  1493. '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1494. '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1495. '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1496. '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1497. '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1498. '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1499. '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1500. '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1501. # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
  1502. '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1503. '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
  1504. '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
  1505. '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
  1506. '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1507. '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
  1508. # Dash webm audio
  1509. '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
  1510. '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
  1511. # Dash webm audio with opus inside
  1512. '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
  1513. '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
  1514. '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
  1515. # RTMP (unnamed)
  1516. '_rtmp': {'protocol': 'rtmp'},
  1517. # av01 video only formats sometimes served with "unknown" codecs
  1518. '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
  1519. '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
  1520. '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
  1521. '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
  1522. }
  1523. _PLAYER_JS_VARIANT_MAP = (
  1524. ('main', 'player_ias.vflset/en_US/base.js'),
  1525. ('tcc', 'player_ias_tcc.vflset/en_US/base.js'),
  1526. ('tce', 'player_ias_tce.vflset/en_US/base.js'),
  1527. ('es5', 'player_es5.vflset/en_US/base.js'),
  1528. ('es6', 'player_es6.vflset/en_US/base.js'),
  1529. ('tv', 'tv-player-ias.vflset/tv-player-ias.js'),
  1530. ('tv_es6', 'tv-player-es6.vflset/tv-player-es6.js'),
  1531. ('phone', 'player-plasma-ias-phone-en_US.vflset/base.js'),
  1532. ('tablet', 'player-plasma-ias-tablet-en_US.vflset/base.js'),
  1533. )
  1534. @classmethod
  1535. def suitable(cls, url):
  1536. if parse_qs(url).get('list', [None])[0]:
  1537. return False
  1538. return super(YoutubeIE, cls).suitable(url)
  1539. def __init__(self, *args, **kwargs):
  1540. super(YoutubeIE, self).__init__(*args, **kwargs)
  1541. self._code_cache = {}
  1542. self._player_cache = {}
  1543. # *ytcfgs, webpage=None
  1544. def _extract_player_url(self, *ytcfgs, **kw_webpage):
  1545. if ytcfgs and not isinstance(ytcfgs[0], dict):
  1546. webpage = kw_webpage.get('webpage') or ytcfgs[0]
  1547. if webpage:
  1548. player_url = self._search_regex(
  1549. r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
  1550. webpage or '', 'player URL', fatal=False)
  1551. if player_url:
  1552. ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},)
  1553. return traverse_obj(
  1554. ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'),
  1555. get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u))
  1556. def _download_player_url(self, video_id, fatal=False):
  1557. res = self._download_webpage(
  1558. 'https://www.youtube.com/iframe_api',
  1559. note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
  1560. player_version = self._search_regex(
  1561. r'player\\?/([0-9a-fA-F]{8})\\?/', res or '', 'player version', fatal=fatal,
  1562. default=NO_DEFAULT if res else None)
  1563. if player_version:
  1564. return 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version)
  1565. def _signature_cache_id(self, example_sig):
  1566. """ Return a string representation of a signature """
  1567. return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
  1568. def _extract_player_info(self, player_url):
  1569. try:
  1570. return self._search_regex(
  1571. self._PLAYER_INFO_RE, player_url, 'player info', group='id')
  1572. except ExtractorError as e:
  1573. raise ExtractorError(
  1574. 'Cannot identify player %r' % (player_url,), cause=e)
  1575. def _player_js_cache_key(self, player_url, extra_id=None, _cache={}):
  1576. if player_url not in _cache:
  1577. player_id = self._extract_player_info(player_url)
  1578. player_path = remove_start(
  1579. compat_urllib_parse.urlparse(player_url).path,
  1580. '/s/player/{0}/'.format(player_id))
  1581. variant = next((k for k, v in self._PLAYER_JS_VARIANT_MAP
  1582. if v == player_path), None)
  1583. if not variant:
  1584. variant = next(
  1585. (k for k, v in self._PLAYER_JS_VARIANT_MAP
  1586. if re.match(re.escape(v).replace('en_US', r'\w+') + '$', player_path)),
  1587. None)
  1588. if not variant:
  1589. self.write_debug(
  1590. 'Unable to determine player JS variant\n'
  1591. ' player = {0}'.format(player_url), only_once=True)
  1592. variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js'))
  1593. _cache[player_url] = join_nonempty(player_id, variant)
  1594. if extra_id:
  1595. extra_id = '-'.join((_cache[player_url], extra_id))
  1596. assert os.path.basename(extra_id) == extra_id
  1597. return extra_id
  1598. return _cache[player_url]
  1599. def _load_player(self, video_id, player_url, fatal=True):
  1600. player_js_key = self._player_js_cache_key(player_url)
  1601. if player_js_key not in self._code_cache:
  1602. code = self._download_webpage(
  1603. player_url, video_id, fatal=fatal,
  1604. note='Downloading player {0}'.format(player_js_key),
  1605. errnote='Download of {0} failed'.format(player_url))
  1606. if code:
  1607. self._code_cache[player_js_key] = code
  1608. return self._code_cache.get(player_js_key)
  1609. def _load_player_data_from_cache(self, name, player_url, extra_id=None):
  1610. cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
  1611. data = self._player_cache.get(cache_id)
  1612. if data:
  1613. return data
  1614. data = self.cache.load(*cache_id, min_ver='2025.04.07')
  1615. if data:
  1616. self._player_cache[cache_id] = data
  1617. return data
  1618. def _store_player_data_to_cache(self, name, player_url, data, extra_id=None):
  1619. cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
  1620. if cache_id not in self._player_cache:
  1621. self.cache.store(cache_id[0], cache_id[1], data)
  1622. self._player_cache[cache_id] = data
  1623. def _remove_player_data_from_cache(self, name, player_url, extra_id=None):
  1624. cache_id = ('youtube-{0}'.format(name), self._player_js_cache_key(player_url, extra_id))
  1625. if cache_id in self._player_cache:
  1626. self.cache.clear(*cache_id)
  1627. self._player_cache.pop(cache_id, None)
  1628. def _extract_signature_function(self, video_id, player_url, example_sig):
  1629. # player_id = self._extract_player_info(player_url)
  1630. # Read from filesystem cache
  1631. extra_id = self._signature_cache_id(example_sig)
  1632. self.write_debug('Extracting signature function {0}-{1}'.format(player_url, extra_id))
  1633. cache_spec, code = self._load_player_data_from_cache(
  1634. 'sigfuncs', player_url, extra_id=extra_id), None
  1635. if not cache_spec:
  1636. code = self._load_player(video_id, player_url)
  1637. if code:
  1638. res = self._parse_sig_js(code)
  1639. test_string = ''.join(map(compat_chr, range(len(example_sig))))
  1640. cache_spec = [ord(c) for c in res(test_string)]
  1641. self._store_player_data_to_cache(
  1642. 'sigfuncs', player_url, cache_spec, extra_id=extra_id)
  1643. else:
  1644. self.report_warning(
  1645. 'Failed to compute signature function {0}-{1}'.format(
  1646. player_url, extra_id))
  1647. return lambda s: ''.join(s[i] for i in cache_spec)
  1648. def _print_sig_code(self, func, example_sig):
  1649. if not self.get_param('youtube_print_sig_code'):
  1650. return
  1651. def gen_sig_code(idxs):
  1652. def _genslice(start, end, step):
  1653. starts = '' if start == 0 else str(start)
  1654. ends = (':%d' % (end + step)) if end + step >= 0 else ':'
  1655. steps = '' if step == 1 else (':%d' % step)
  1656. return 's[{0}{1}{2}]'.format(starts, ends, steps)
  1657. step = None
  1658. # Quelch pyflakes warnings - start will be set when step is set
  1659. start = '(Never used)'
  1660. for i, prev in zip(idxs[1:], idxs[:-1]):
  1661. if step is not None:
  1662. if i - prev == step:
  1663. continue
  1664. yield _genslice(start, prev, step)
  1665. step = None
  1666. continue
  1667. if i - prev in [-1, 1]:
  1668. step = i - prev
  1669. start = prev
  1670. continue
  1671. else:
  1672. yield 's[%d]' % prev
  1673. if step is None:
  1674. yield 's[%d]' % i
  1675. else:
  1676. yield _genslice(start, i, step)
  1677. test_string = ''.join(map(compat_chr, range(len(example_sig))))
  1678. cache_res = func(test_string)
  1679. cache_spec = [ord(c) for c in cache_res]
  1680. expr_code = ' + '.join(gen_sig_code(cache_spec))
  1681. signature_id_tuple = '(%s)' % (
  1682. ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
  1683. code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
  1684. ' return %s\n') % (signature_id_tuple, expr_code)
  1685. self.to_screen('Extracted signature function:\n' + code)
  1686. def _extract_sig_fn(self, jsi, funcname):
  1687. var_ay = self._search_regex(
  1688. r'''(?x)
  1689. (?:\*/|\{|\n|^)\s*(?:'[^']+'\s*;\s*)
  1690. (var\s*[\w$]+\s*=\s*(?:
  1691. ('|")(?:\\\2|(?!\2).)+\2\s*\.\s*split\(\s*('|")\W+\3\s*\)|
  1692. \[\s*(?:('|")(?:\\\4|(?!\4).)*\4\s*(?:(?=\])|,\s*))+\]
  1693. ))(?=\s*[,;])
  1694. ''', jsi.code, 'useful values', default='')
  1695. sig_fn = jsi.extract_function_code(funcname)
  1696. if var_ay:
  1697. sig_fn = (sig_fn[0], ';\n'.join((var_ay, sig_fn[1])))
  1698. return sig_fn
  1699. def _parse_sig_js(self, jscode):
  1700. # Examples where `sig` is funcname:
  1701. # sig=function(a){a=a.split(""); ... ;return a.join("")};
  1702. # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
  1703. # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
  1704. # sig=function(J){J=J.split(""); ... ;return J.join("")};
  1705. # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
  1706. # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
  1707. funcname = self._search_regex(
  1708. (r'\b(?P<var>[\w$]+)&&\((?P=var)=(?P<sig>[\w$]{2,})\(decodeURIComponent\((?P=var)\)\)',
  1709. r'(?P<sig>[\w$]+)\s*=\s*function\(\s*(?P<arg>[\w$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
  1710. r'(?:\b|[^\w$])(?P<sig>[\w$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[\w$]{2}\.[\w$]{2}\(a,\d+\))?',
  1711. # Old patterns
  1712. r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[\w$]+)\(',
  1713. r'\b[\w]+\s*&&\s*[\w]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[\w$]+)\(',
  1714. r'\bm=(?P<sig>[\w$]{2,})\(decodeURIComponent\(h\.s\)\)',
  1715. # Obsolete patterns
  1716. r'("|\')signature\1\s*,\s*(?P<sig>[\w$]+)\(',
  1717. r'\.sig\|\|(?P<sig>[\w$]+)\(',
  1718. r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[\w$]+)\(',
  1719. r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[\w$]+)\(',
  1720. r'\bc\s*&&\s*[\w]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[\w$]+)\('),
  1721. jscode, 'Initial JS player signature function name', group='sig')
  1722. jsi = JSInterpreter(jscode)
  1723. initial_function = self._extract_sig_fn(jsi, funcname)
  1724. func = jsi.extract_function_from_code(*initial_function)
  1725. return lambda s: func([s])
  1726. def _cached(self, func, *cache_id):
  1727. def inner(*args, **kwargs):
  1728. if cache_id not in self._player_cache:
  1729. try:
  1730. self._player_cache[cache_id] = func(*args, **kwargs)
  1731. except ExtractorError as e:
  1732. self._player_cache[cache_id] = e
  1733. except Exception as e:
  1734. self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
  1735. ret = self._player_cache[cache_id]
  1736. if isinstance(ret, Exception):
  1737. raise ret
  1738. return ret
  1739. return inner
  1740. def _decrypt_signature(self, s, video_id, player_url):
  1741. """Turn the encrypted s field into a working signature"""
  1742. extract_sig = self._cached(
  1743. self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
  1744. func = extract_sig(video_id, player_url, s)
  1745. self._print_sig_code(func, s)
  1746. return func(s)
  1747. # from yt-dlp
  1748. # See also:
  1749. # 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
  1750. # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
  1751. # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
  1752. def _decrypt_nsig(self, n, video_id, player_url):
  1753. """Turn the encrypted n field into a working signature"""
  1754. if player_url is None:
  1755. raise ExtractorError('Cannot decrypt nsig without player_url')
  1756. try:
  1757. jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
  1758. except ExtractorError as e:
  1759. raise ExtractorError('Unable to extract nsig function code', cause=e)
  1760. if self.get_param('youtube_print_sig_code'):
  1761. self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
  1762. player_id, func_code[1]))
  1763. try:
  1764. extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
  1765. ret = extract_nsig(jsi, func_code)(n)
  1766. except JSInterpreter.Exception as e:
  1767. self.report_warning(
  1768. '%s (%s %s)' % (
  1769. 'Unable to decode n-parameter: expect download to be blocked or throttled',
  1770. error_to_compat_str(e),
  1771. traceback.format_exc()),
  1772. video_id=video_id)
  1773. return
  1774. self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
  1775. return ret
  1776. def _extract_n_function_name(self, jscode):
  1777. func_name, idx = None, None
  1778. def generic_n_function_search(func_name=None):
  1779. return self._search_regex(
  1780. r'''(?xs)
  1781. (?:(?<=[^\w$])|^) # instead of \b, which ignores $
  1782. (?P<name>%s)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
  1783. \s*\{(?:(?!};).)+?(?:
  1784. ["']enhanced_except_ |
  1785. return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
  1786. )
  1787. ''' % (func_name or r'(?!\d)[a-zA-Z\d_$]+',), jscode,
  1788. 'Initial JS player n function name', group='name',
  1789. default=None if func_name else NO_DEFAULT)
  1790. # these special cases are redundant and probably obsolete (2025-04):
  1791. # they make the tests run ~10% faster without fallback warnings
  1792. r"""
  1793. func_name, idx = self._search_regex(
  1794. # (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
  1795. # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
  1796. # or: (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
  1797. # or: (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c)
  1798. # or: (PL(a),b=a.j.n||null)&&(b=narray[idx](b)
  1799. # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
  1800. # old: (b=a.get("n"))&&(b=narray[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
  1801. # older: (b=a.get("n"))&&(b=nfunc(b)
  1802. r'''(?x)
  1803. # (expr, ...,
  1804. \((?:(?:\s*[\w$]+\s*=)?(?:[\w$"+\.\s(\[]+(?:[)\]]\s*)?),)*
  1805. # b=...
  1806. (?P<b>[\w$]+)\s*=\s*(?!(?P=b)[^\w$])[\w$]+\s*(?:(?:
  1807. \.\s*[\w$]+ |
  1808. \[\s*[\w$]+\s*\] |
  1809. \.\s*get\s*\(\s*[\w$"]+\s*\)
  1810. )\s*){,2}(?:\s*\|\|\s*null(?=\s*\)))?\s*
  1811. \)\s*&&\s*\( # ...)&&(
  1812. # b = nfunc, b = narray[idx]
  1813. (?P=b)\s*=\s*(?P<nfunc>[\w$]+)\s*
  1814. (?:\[\s*(?P<idx>[\w$]+)\s*\]\s*)?
  1815. # (...)
  1816. \(\s*[\w$]+\s*\)
  1817. ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
  1818. default=(None, None))
  1819. """
  1820. if not func_name:
  1821. # nfunc=function(x){...}|function nfunc(x); ...
  1822. # ... var y=[nfunc]|y[idx]=nfunc);
  1823. # obvious REs hang, so use a two-stage tactic
  1824. for m in re.finditer(r'''(?x)
  1825. [\n;]var\s(?:(?:(?!,).)+,|\s)*?(?!\d)[\w$]+(?:\[(?P<idx>\d+)\])?\s*=\s*
  1826. (?(idx)|\[\s*)(?P<nfunc>(?!\d)[\w$]+)(?(idx)|\s*\])
  1827. \s*?[;\n]
  1828. ''', jscode):
  1829. fn = self._search_regex(
  1830. r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format(
  1831. re.escape(m.group('nfunc')), '{'),
  1832. jscode, 'Initial JS player n function name (2)', group=2, default=None)
  1833. if fn:
  1834. func_name = fn
  1835. idx = m.group('idx')
  1836. if generic_n_function_search(func_name):
  1837. # don't look any further
  1838. break
  1839. # thx bashonly: yt-dlp/yt-dlp/pull/10611
  1840. if not func_name:
  1841. self.report_warning('Falling back to generic n function search', only_once=True)
  1842. return generic_n_function_search()
  1843. if not idx:
  1844. return func_name
  1845. return self._search_json(
  1846. r'(?<![\w-])var\s(?:(?:(?!,).)+,|\s)*?{0}\s*='.format(re.escape(func_name)), jscode,
  1847. 'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
  1848. func_name, contains_pattern=r'\[.+\]', end_pattern='[,;]',
  1849. transform_source=js_to_json)[int(idx)]
  1850. def _extract_n_function_code(self, video_id, player_url):
  1851. player_id = self._extract_player_info(player_url)
  1852. func_code = self._load_player_data_from_cache('nsig', player_url)
  1853. jscode = func_code or self._load_player(video_id, player_url)
  1854. jsi = JSInterpreter(jscode)
  1855. if func_code:
  1856. return jsi, player_id, func_code
  1857. return self._extract_n_function_code_jsi(video_id, jsi, player_id, player_url)
  1858. def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None, player_url=None):
  1859. func_name = self._extract_n_function_name(jsi.code)
  1860. func_code = self._extract_sig_fn(jsi, func_name)
  1861. if player_url:
  1862. self._store_player_data_to_cache('nsig', player_url, func_code)
  1863. return jsi, player_id, func_code
  1864. def _extract_n_function_from_code(self, jsi, func_code):
  1865. func = jsi.extract_function_from_code(*func_code)
  1866. def extract_nsig(s):
  1867. try:
  1868. ret = func([s], kwargs={'_ytdl_do_not_return': s})
  1869. except JSInterpreter.Exception:
  1870. raise
  1871. except Exception as e:
  1872. raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
  1873. if ret.startswith('enhanced_except_') or ret.endswith(s):
  1874. raise JSInterpreter.Exception('Signature function returned an exception')
  1875. return ret
  1876. return extract_nsig
  1877. def _unthrottle_format_urls(self, video_id, player_url, *formats):
  1878. def decrypt_nsig(n):
  1879. return self._cached(self._decrypt_nsig, 'nsig', n, player_url)
  1880. for fmt in formats:
  1881. parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
  1882. n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
  1883. if not n_param:
  1884. continue
  1885. n_param = n_param[-1]
  1886. n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
  1887. if n_response is None:
  1888. # give up and forget cached data if descrambling failed
  1889. self._remove_player_data_from_cache('nsig', player_url)
  1890. break
  1891. fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
  1892. # from yt-dlp, with tweaks
  1893. def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
  1894. """
  1895. Extract signatureTimestamp (sts)
  1896. Required to tell API what sig/player version is in use.
  1897. """
  1898. sts = traverse_obj(ytcfg, 'STS', expected_type=int)
  1899. if sts:
  1900. return sts
  1901. if not player_url:
  1902. error_msg = 'Cannot extract signature timestamp without player url'
  1903. if fatal:
  1904. raise ExtractorError(error_msg)
  1905. self.report_warning(error_msg)
  1906. return None
  1907. sts = self._load_player_data_from_cache('sts', player_url)
  1908. if sts:
  1909. return sts
  1910. # Attempt to extract from player
  1911. code = self._load_player(video_id, player_url, fatal=fatal)
  1912. sts = int_or_none(self._search_regex(
  1913. r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
  1914. 'JS player signature timestamp', group='sts', fatal=fatal))
  1915. if sts:
  1916. self._store_player_data_to_cache('sts', player_url, sts)
  1917. return sts
  1918. def _mark_watched(self, video_id, player_response):
  1919. playback_url = url_or_none(try_get(
  1920. player_response,
  1921. lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
  1922. if not playback_url:
  1923. return
  1924. # cpn generation algorithm is reverse engineered from base.js.
  1925. # In fact it works even with dummy cpn.
  1926. CPN_ALPHABET = string.ascii_letters + string.digits + '-_'
  1927. cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
  1928. # more consistent results setting it to right before the end
  1929. qs = parse_qs(playback_url)
  1930. video_length = '{0}'.format(float((qs.get('len') or ['1.5'])[0]) - 1)
  1931. playback_url = update_url_query(
  1932. playback_url, {
  1933. 'ver': '2',
  1934. 'cpn': cpn,
  1935. 'cmt': video_length,
  1936. 'el': 'detailpage', # otherwise defaults to "shorts"
  1937. })
  1938. self._download_webpage(
  1939. playback_url, video_id, 'Marking watched',
  1940. 'Unable to mark watched', fatal=False)
  1941. @staticmethod
  1942. def _extract_urls(webpage):
  1943. # Embedded YouTube player
  1944. entries = [
  1945. unescapeHTML(mobj.group('url'))
  1946. for mobj in re.finditer(r'''(?x)
  1947. (?:
  1948. <iframe[^>]+?src=|
  1949. data-video-url=|
  1950. <embed[^>]+?src=|
  1951. embedSWF\(?:\s*|
  1952. <object[^>]+data=|
  1953. new\s+SWFObject\(
  1954. )
  1955. (["\'])
  1956. (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
  1957. (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
  1958. \1''', webpage)]
  1959. # lazyYT YouTube embed
  1960. entries.extend(list(map(
  1961. unescapeHTML,
  1962. re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
  1963. # Wordpress "YouTube Video Importer" plugin
  1964. matches = re.findall(r'''(?x)<div[^>]+
  1965. class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
  1966. data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
  1967. entries.extend(m[-1] for m in matches)
  1968. return entries
  1969. @staticmethod
  1970. def _extract_url(webpage):
  1971. urls = YoutubeIE._extract_urls(webpage)
  1972. return urls[0] if urls else None
  1973. @classmethod
  1974. def extract_id(cls, url):
  1975. mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
  1976. if mobj is None:
  1977. raise ExtractorError('Invalid URL: %s' % url)
  1978. return mobj.group(2)
  1979. def _extract_chapters_from_json(self, data, video_id, duration):
  1980. chapters_list = try_get(
  1981. data,
  1982. lambda x: x['playerOverlays']
  1983. ['playerOverlayRenderer']
  1984. ['decoratedPlayerBarRenderer']
  1985. ['decoratedPlayerBarRenderer']
  1986. ['playerBar']
  1987. ['chapteredPlayerBarRenderer']
  1988. ['chapters'],
  1989. list)
  1990. if not chapters_list:
  1991. return
  1992. def chapter_time(chapter):
  1993. return float_or_none(
  1994. try_get(
  1995. chapter,
  1996. lambda x: x['chapterRenderer']['timeRangeStartMillis'],
  1997. int),
  1998. scale=1000)
  1999. chapters = []
  2000. for next_num, chapter in enumerate(chapters_list, start=1):
  2001. start_time = chapter_time(chapter)
  2002. if start_time is None:
  2003. continue
  2004. end_time = (chapter_time(chapters_list[next_num])
  2005. if next_num < len(chapters_list) else duration)
  2006. if end_time is None:
  2007. continue
  2008. title = try_get(
  2009. chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
  2010. compat_str)
  2011. chapters.append({
  2012. 'start_time': start_time,
  2013. 'end_time': end_time,
  2014. 'title': title,
  2015. })
  2016. return chapters
  2017. def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
  2018. return self._parse_json(self._search_regex(
  2019. (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
  2020. regex), webpage, name, default='{}'), video_id, fatal=False)
  2021. def _real_extract(self, url):
  2022. url, smuggled_data = unsmuggle_url(url, {})
  2023. video_id = self._match_id(url)
  2024. base_url = self.http_scheme() + '//www.youtube.com/'
  2025. webpage_url = base_url + 'watch?v=' + video_id
  2026. webpage = self._download_webpage(
  2027. webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
  2028. player_response = None
  2029. player_url = None
  2030. if webpage:
  2031. player_response = self._extract_yt_initial_variable(
  2032. webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
  2033. video_id, 'initial player response')
  2034. is_live = traverse_obj(player_response, ('videoDetails', 'isLive'))
  2035. if False and not player_response:
  2036. player_response = self._call_api(
  2037. 'player', {'videoId': video_id}, video_id)
  2038. if True or not player_response:
  2039. origin = 'https://www.youtube.com'
  2040. pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
  2041. player_url = self._extract_player_url(webpage)
  2042. ytcfg = self._extract_ytcfg(video_id, webpage or '')
  2043. sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
  2044. if sts:
  2045. pb_context['signatureTimestamp'] = sts
  2046. client_names = traverse_obj(self._INNERTUBE_CLIENTS, (
  2047. T(dict.items), lambda _, k_v: not k_v[1].get('REQUIRE_PO_TOKEN'),
  2048. 0))[:1]
  2049. if 'web' not in client_names:
  2050. # webpage links won't download: ignore links and playability
  2051. player_response = filter_dict(
  2052. player_response or {},
  2053. lambda k, _: k not in ('streamingData', 'playabilityStatus'))
  2054. if is_live and 'ios' not in client_names:
  2055. client_names.append('ios')
  2056. headers = {
  2057. 'Sec-Fetch-Mode': 'navigate',
  2058. 'Origin': origin,
  2059. 'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '',
  2060. }
  2061. auth = self._generate_sapisidhash_header(origin)
  2062. if auth is not None:
  2063. headers['Authorization'] = auth
  2064. headers['X-Origin'] = origin
  2065. for client in traverse_obj(self._INNERTUBE_CLIENTS, (client_names, T(dict))):
  2066. query = {
  2067. 'playbackContext': {
  2068. 'contentPlaybackContext': pb_context,
  2069. },
  2070. 'contentCheckOk': True,
  2071. 'racyCheckOk': True,
  2072. 'context': {
  2073. 'client': merge_dicts(
  2074. traverse_obj(client, ('INNERTUBE_CONTEXT', 'client')), {
  2075. 'hl': 'en',
  2076. 'timeZone': 'UTC',
  2077. 'utcOffsetMinutes': 0,
  2078. }),
  2079. },
  2080. 'videoId': video_id,
  2081. }
  2082. api_headers = merge_dicts(headers, traverse_obj(client, {
  2083. 'X-YouTube-Client-Name': 'INNERTUBE_CONTEXT_CLIENT_NAME',
  2084. 'X-YouTube-Client-Version': (
  2085. 'INNERTUBE_CONTEXT', 'client', 'clientVersion'),
  2086. 'User-Agent': (
  2087. 'INNERTUBE_CONTEXT', 'client', 'userAgent'),
  2088. }))
  2089. api_player_response = self._call_api(
  2090. 'player', query, video_id, fatal=False, headers=api_headers,
  2091. note=join_nonempty(
  2092. 'Downloading', traverse_obj(query, (
  2093. 'context', 'client', 'clientName')),
  2094. 'API JSON', delim=' '))
  2095. hls = traverse_obj(
  2096. (player_response, api_player_response),
  2097. (Ellipsis, 'streamingData', 'hlsManifestUrl', T(url_or_none)))
  2098. if len(hls) == 2 and not hls[0] and hls[1]:
  2099. player_response['streamingData']['hlsManifestUrl'] = hls[1]
  2100. else:
  2101. video_details = merge_dicts(*traverse_obj(
  2102. (player_response, api_player_response),
  2103. (Ellipsis, 'videoDetails', T(dict))))
  2104. player_response.update(filter_dict(
  2105. api_player_response or {}, cndn=lambda k, _: k != 'captions'))
  2106. player_response['videoDetails'] = video_details
  2107. def is_agegated(playability):
  2108. # playability: dict
  2109. if not playability:
  2110. return False
  2111. if playability.get('desktopLegacyAgeGateReason'):
  2112. return True
  2113. reasons = traverse_obj(playability, (('status', 'reason'),))
  2114. AGE_GATE_REASONS = (
  2115. 'confirm your age', 'age-restricted', 'inappropriate', # reason
  2116. 'age_verification_required', 'age_check_required', # status
  2117. )
  2118. return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
  2119. def get_playability_status(response):
  2120. return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
  2121. playability_status = get_playability_status(player_response)
  2122. if (is_agegated(playability_status)
  2123. and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
  2124. self.report_age_confirmation()
  2125. # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
  2126. pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
  2127. # Use signatureTimestamp if available
  2128. # Thanks https://github.com/ytdl-org/youtube-dl/issues/31034#issuecomment-1160718026
  2129. player_url = self._extract_player_url(webpage)
  2130. ytcfg = self._extract_ytcfg(video_id, webpage)
  2131. sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
  2132. if sts:
  2133. pb_context['signatureTimestamp'] = sts
  2134. query = {
  2135. 'playbackContext': {'contentPlaybackContext': pb_context},
  2136. 'contentCheckOk': True,
  2137. 'racyCheckOk': True,
  2138. 'context': {
  2139. 'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
  2140. 'thirdParty': {'embedUrl': 'https://google.com'},
  2141. },
  2142. 'videoId': video_id,
  2143. }
  2144. headers = {
  2145. 'X-YouTube-Client-Name': '85',
  2146. 'X-YouTube-Client-Version': '2.0',
  2147. 'Origin': 'https://www.youtube.com',
  2148. }
  2149. video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
  2150. age_gate_status = get_playability_status(video_info)
  2151. if age_gate_status.get('status') == 'OK':
  2152. player_response = video_info
  2153. playability_status = age_gate_status
  2154. trailer_video_id = try_get(
  2155. playability_status,
  2156. lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
  2157. compat_str)
  2158. if trailer_video_id:
  2159. return self.url_result(
  2160. trailer_video_id, self.ie_key(), trailer_video_id)
  2161. def get_text(x):
  2162. return ''.join(traverse_obj(
  2163. x, (('simpleText',),), ('runs', Ellipsis, 'text'),
  2164. expected_type=compat_str))
  2165. search_meta = (
  2166. (lambda x: self._html_search_meta(x, webpage, default=None))
  2167. if webpage else lambda _: None)
  2168. video_details = player_response.get('videoDetails') or {}
  2169. microformat = try_get(
  2170. player_response,
  2171. lambda x: x['microformat']['playerMicroformatRenderer'],
  2172. dict) or {}
  2173. video_title = video_details.get('title') \
  2174. or get_text(microformat.get('title')) \
  2175. or search_meta(['og:title', 'twitter:title', 'title'])
  2176. video_description = video_details.get('shortDescription')
  2177. if not smuggled_data.get('force_singlefeed', False):
  2178. if not self._downloader.params.get('noplaylist'):
  2179. multifeed_metadata_list = try_get(
  2180. player_response,
  2181. lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
  2182. compat_str)
  2183. if multifeed_metadata_list:
  2184. entries = []
  2185. feed_ids = []
  2186. for feed in multifeed_metadata_list.split(','):
  2187. # Unquote should take place before split on comma (,) since textual
  2188. # fields may contain comma as well (see
  2189. # https://github.com/ytdl-org/youtube-dl/issues/8536)
  2190. feed_data = compat_parse_qs(
  2191. compat_urllib_parse_unquote_plus(feed))
  2192. def feed_entry(name):
  2193. return try_get(
  2194. feed_data, lambda x: x[name][0], compat_str)
  2195. feed_id = feed_entry('id')
  2196. if not feed_id:
  2197. continue
  2198. feed_title = feed_entry('title')
  2199. title = video_title
  2200. if feed_title:
  2201. title += ' (%s)' % feed_title
  2202. entries.append({
  2203. '_type': 'url_transparent',
  2204. 'ie_key': 'Youtube',
  2205. 'url': smuggle_url(
  2206. base_url + 'watch?v=' + feed_data['id'][0],
  2207. {'force_singlefeed': True}),
  2208. 'title': title,
  2209. })
  2210. feed_ids.append(feed_id)
  2211. self.to_screen(
  2212. 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
  2213. % (', '.join(feed_ids), video_id))
  2214. return self.playlist_result(
  2215. entries, video_id, video_title, video_description)
  2216. else:
  2217. self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
  2218. if not player_url:
  2219. player_url = self._extract_player_url(webpage)
  2220. formats = []
  2221. itags = collections.defaultdict(set)
  2222. itag_qualities = {}
  2223. q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
  2224. CHUNK_SIZE = 10 << 20
  2225. is_live = video_details.get('isLive')
  2226. streaming_data = player_response.get('streamingData') or {}
  2227. streaming_formats = streaming_data.get('formats') or []
  2228. streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
  2229. def build_fragments(f):
  2230. return LazyList({
  2231. 'url': update_url_query(f['url'], {
  2232. 'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize'])),
  2233. })
  2234. } for range_start in range(0, f['filesize'], CHUNK_SIZE))
  2235. lower = lambda s: s.lower()
  2236. for fmt in streaming_formats:
  2237. if fmt.get('targetDurationSec'):
  2238. continue
  2239. itag = str_or_none(fmt.get('itag'))
  2240. audio_track = traverse_obj(fmt, ('audioTrack', T(dict))) or {}
  2241. quality = traverse_obj(fmt, ((
  2242. # The 3gp format (17) in android client has a quality of "small",
  2243. # but is actually worse than other formats
  2244. T(lambda _: 'tiny' if itag == 17 else None),
  2245. ('quality', T(lambda q: q if q and q != 'tiny' else None)),
  2246. ('audioQuality', T(lower)),
  2247. 'quality'), T(txt_or_none)), get_all=False)
  2248. if quality and itag:
  2249. itag_qualities[itag] = quality
  2250. # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
  2251. # (adding `&sq=0` to the URL) and parsing emsg box to determine the
  2252. # number of fragments that would subsequently be requested with (`&sq=N`)
  2253. if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
  2254. continue
  2255. fmt_url = fmt.get('url')
  2256. if not fmt_url:
  2257. sc = compat_parse_qs(fmt.get('signatureCipher'))
  2258. fmt_url = traverse_obj(sc, ('url', -1, T(url_or_none)))
  2259. encrypted_sig = traverse_obj(sc, ('s', -1))
  2260. if not (fmt_url and encrypted_sig):
  2261. continue
  2262. player_url = player_url or self._extract_player_url(webpage)
  2263. if not player_url:
  2264. continue
  2265. try:
  2266. fmt_url = update_url_query(fmt_url, {
  2267. traverse_obj(sc, ('sp', -1)) or 'signature':
  2268. [self._decrypt_signature(encrypted_sig, video_id, player_url)],
  2269. })
  2270. except ExtractorError as e:
  2271. self.report_warning('Signature extraction failed: Some formats may be missing',
  2272. video_id=video_id, only_once=True)
  2273. self.write_debug(error_to_compat_str(e), only_once=True)
  2274. continue
  2275. language_preference = (
  2276. 10 if audio_track.get('audioIsDefault')
  2277. else -10 if 'descriptive' in (traverse_obj(audio_track, ('displayName', T(lower))) or '')
  2278. else -1)
  2279. name = (
  2280. traverse_obj(fmt, ('qualityLabel', T(txt_or_none)))
  2281. or quality.replace('audio_quality_', ''))
  2282. dct = {
  2283. 'format_id': join_nonempty(itag, fmt.get('isDrc') and 'drc'),
  2284. 'url': fmt_url,
  2285. # Format 22 is likely to be damaged: see https://github.com/yt-dlp/yt-dlp/issues/3372
  2286. 'source_preference': ((-5 if itag == '22' else -1)
  2287. + (100 if 'Premium' in name else 0)),
  2288. 'quality': q(quality),
  2289. 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
  2290. 'desc' if language_preference < -1 else '') or None,
  2291. 'language_preference': language_preference,
  2292. # Strictly de-prioritize 3gp formats
  2293. 'preference': -2 if itag == '17' else None,
  2294. }
  2295. if itag:
  2296. itags[itag].add(('https', dct.get('language')))
  2297. self._unthrottle_format_urls(video_id, player_url, dct)
  2298. dct.update(traverse_obj(fmt, {
  2299. 'asr': ('audioSampleRate', T(int_or_none)),
  2300. 'filesize': ('contentLength', T(int_or_none)),
  2301. 'format_note': ('qualityLabel', T(lambda x: x or quality)),
  2302. # for some formats, fps is wrongly returned as 1
  2303. 'fps': ('fps', T(int_or_none), T(lambda f: f if f > 1 else None)),
  2304. 'audio_channels': ('audioChannels', T(int_or_none)),
  2305. 'height': ('height', T(int_or_none)),
  2306. 'has_drm': ('drmFamilies', T(bool)),
  2307. 'tbr': (('averageBitrate', 'bitrate'), T(lambda t: float_or_none(t, 1000))),
  2308. 'width': ('width', T(int_or_none)),
  2309. '_duration_ms': ('approxDurationMs', T(int_or_none)),
  2310. }, get_all=False))
  2311. mime_mobj = re.match(
  2312. r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
  2313. if mime_mobj:
  2314. dct['ext'] = mimetype2ext(mime_mobj.group(1))
  2315. dct.update(parse_codecs(mime_mobj.group(2)))
  2316. single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec'))
  2317. if single_stream and dct.get('ext'):
  2318. dct['container'] = dct['ext'] + '_dash'
  2319. if single_stream or itag == '17':
  2320. # avoid Youtube throttling
  2321. dct.update({
  2322. 'protocol': 'http_dash_segments',
  2323. 'fragments': build_fragments(dct),
  2324. } if dct['filesize'] else {
  2325. 'downloader_options': {'http_chunk_size': CHUNK_SIZE}, # No longer useful?
  2326. })
  2327. formats.append(dct)
  2328. def process_manifest_format(f, proto, client_name, itag, all_formats=False):
  2329. key = (proto, f.get('language'))
  2330. if not all_formats and key in itags[itag]:
  2331. return False
  2332. itags[itag].add(key)
  2333. if itag:
  2334. f['format_id'] = (
  2335. '{0}-{1}'.format(itag, proto)
  2336. if all_formats or any(p != proto for p, _ in itags[itag])
  2337. else itag)
  2338. if f.get('source_preference') is None:
  2339. f['source_preference'] = -1
  2340. if itag in ('616', '235'):
  2341. f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
  2342. f['source_preference'] += 100
  2343. f['quality'] = q(traverse_obj(f, (
  2344. 'format_id', T(lambda s: itag_qualities[s.split('-')[0]])), default=-1))
  2345. if try_call(lambda: f['fps'] <= 1):
  2346. del f['fps']
  2347. if proto == 'hls' and f.get('has_drm'):
  2348. f['has_drm'] = 'maybe'
  2349. f['source_preference'] -= 5
  2350. return True
  2351. hls_manifest_url = streaming_data.get('hlsManifestUrl')
  2352. if hls_manifest_url:
  2353. formats.extend(
  2354. f for f in self._extract_m3u8_formats(
  2355. hls_manifest_url, video_id, 'mp4',
  2356. entry_protocol='m3u8_native', live=is_live, fatal=False)
  2357. if process_manifest_format(
  2358. f, 'hls', None, self._search_regex(
  2359. r'/itag/(\d+)', f['url'], 'itag', default=None)))
  2360. if self.get_param('youtube_include_dash_manifest', True):
  2361. dash_manifest_url = streaming_data.get('dashManifestUrl')
  2362. if dash_manifest_url:
  2363. for f in self._extract_mpd_formats(
  2364. dash_manifest_url, video_id, fatal=False):
  2365. if process_manifest_format(
  2366. f, 'dash', None, f['format_id']):
  2367. f['filesize'] = traverse_obj(f, (
  2368. ('fragment_base_url', 'url'), T(lambda u: self._search_regex(
  2369. r'/clen/(\d+)', u, 'file size', default=None)),
  2370. T(int_or_none)), get_all=False)
  2371. formats.append(f)
  2372. playable_formats = [f for f in formats if not f.get('has_drm')]
  2373. if formats:
  2374. if not playable_formats:
  2375. # If there are no formats that definitely don't have DRM, all have DRM
  2376. self.report_drm(video_id)
  2377. formats[:] = playable_formats
  2378. else:
  2379. if streaming_data.get('licenseInfos'):
  2380. raise ExtractorError(
  2381. 'This video is DRM protected.', expected=True)
  2382. pemr = try_get(
  2383. playability_status,
  2384. lambda x: x['errorScreen']['playerErrorMessageRenderer'],
  2385. dict) or {}
  2386. reason = get_text(pemr.get('reason')) or playability_status.get('reason') or ''
  2387. subreason = pemr.get('subreason')
  2388. if subreason:
  2389. subreason = clean_html(get_text(subreason))
  2390. if subreason == 'The uploader has not made this video available in your country.':
  2391. countries = microformat.get('availableCountries')
  2392. if not countries:
  2393. regions_allowed = search_meta('regionsAllowed')
  2394. countries = regions_allowed.split(',') if regions_allowed else None
  2395. self.raise_geo_restricted(
  2396. subreason, countries)
  2397. reason += '\n' + subreason
  2398. if reason:
  2399. raise ExtractorError(reason, expected=True)
  2400. self._sort_formats(formats)
  2401. keywords = video_details.get('keywords') or []
  2402. if not keywords and webpage:
  2403. keywords = [
  2404. unescapeHTML(m.group('content'))
  2405. for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
  2406. for keyword in keywords:
  2407. if keyword.startswith('yt:stretch='):
  2408. mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
  2409. if mobj:
  2410. # NB: float is intentional for forcing float division
  2411. w, h = (float(v) for v in mobj.groups())
  2412. if w > 0 and h > 0:
  2413. ratio = w / h
  2414. for f in formats:
  2415. if f.get('vcodec') != 'none':
  2416. f['stretched_ratio'] = ratio
  2417. break
  2418. thumbnails = []
  2419. for container in (video_details, microformat):
  2420. for thumbnail in try_get(
  2421. container,
  2422. lambda x: x['thumbnail']['thumbnails'], list) or []:
  2423. thumbnail_url = url_or_none(thumbnail.get('url'))
  2424. if not thumbnail_url:
  2425. continue
  2426. thumbnails.append({
  2427. 'height': int_or_none(thumbnail.get('height')),
  2428. 'url': update_url(thumbnail_url, query=None, fragment=None),
  2429. 'width': int_or_none(thumbnail.get('width')),
  2430. })
  2431. if thumbnails:
  2432. break
  2433. else:
  2434. thumbnail = search_meta(['og:image', 'twitter:image'])
  2435. if thumbnail:
  2436. thumbnails = [{'url': thumbnail}]
  2437. category = microformat.get('category') or search_meta('genre')
  2438. channel_id = self._extract_channel_id(
  2439. webpage, videodetails=video_details, metadata=microformat)
  2440. duration = int_or_none(
  2441. video_details.get('lengthSeconds')
  2442. or microformat.get('lengthSeconds')) \
  2443. or parse_duration(search_meta('duration'))
  2444. for f in formats:
  2445. # Some formats may have much smaller duration than others (possibly damaged during encoding)
  2446. # but avoid false positives with small duration differences.
  2447. # Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
  2448. if try_call(lambda x: float(x.pop('_duration_ms')) / duration < 500, args=(f,)):
  2449. self.report_warning(
  2450. '{0}: Some possibly damaged formats will be deprioritized'.format(video_id), only_once=True)
  2451. # Strictly de-prioritize damaged formats
  2452. f['preference'] = -10
  2453. owner_profile_url = self._yt_urljoin(self._extract_author_var(
  2454. webpage, 'url', videodetails=video_details, metadata=microformat))
  2455. uploader = self._extract_author_var(
  2456. webpage, 'name', videodetails=video_details, metadata=microformat)
  2457. info = {
  2458. 'id': video_id,
  2459. 'title': self._live_title(video_title) if is_live else video_title,
  2460. 'formats': formats,
  2461. 'thumbnails': thumbnails,
  2462. 'description': video_description,
  2463. 'upload_date': unified_strdate(
  2464. microformat.get('uploadDate')
  2465. or search_meta('uploadDate')),
  2466. 'uploader': uploader,
  2467. 'channel_id': channel_id,
  2468. 'duration': duration,
  2469. 'view_count': int_or_none(
  2470. video_details.get('viewCount')
  2471. or microformat.get('viewCount')
  2472. or search_meta('interactionCount')),
  2473. 'average_rating': float_or_none(video_details.get('averageRating')),
  2474. 'age_limit': 18 if (
  2475. microformat.get('isFamilySafe') is False
  2476. or search_meta('isFamilyFriendly') == 'false'
  2477. or search_meta('og:restrictions:age') == '18+') else 0,
  2478. 'webpage_url': webpage_url,
  2479. 'categories': [category] if category else None,
  2480. 'tags': keywords,
  2481. 'is_live': is_live,
  2482. }
  2483. pctr = traverse_obj(
  2484. (player_response, api_player_response),
  2485. (Ellipsis, 'captions', 'playerCaptionsTracklistRenderer', T(dict)))
  2486. if pctr:
  2487. def process_language(container, base_url, lang_code, query):
  2488. lang_subs = []
  2489. for fmt in self._SUBTITLE_FORMATS:
  2490. query.update({
  2491. 'fmt': fmt,
  2492. })
  2493. lang_subs.append({
  2494. 'ext': fmt,
  2495. 'url': update_url_query(base_url, query),
  2496. })
  2497. container[lang_code] = lang_subs
  2498. def process_subtitles():
  2499. subtitles = {}
  2500. for caption_track in traverse_obj(pctr, (
  2501. Ellipsis, 'captionTracks', lambda _, v: (
  2502. v.get('baseUrl') and v.get('languageCode')))):
  2503. base_url = self._yt_urljoin(caption_track['baseUrl'])
  2504. if not base_url:
  2505. continue
  2506. lang_code = caption_track['languageCode']
  2507. if caption_track.get('kind') != 'asr':
  2508. process_language(
  2509. subtitles, base_url, lang_code, {})
  2510. continue
  2511. automatic_captions = {}
  2512. process_language(
  2513. automatic_captions, base_url, lang_code, {})
  2514. for translation_language in traverse_obj(pctr, (
  2515. Ellipsis, 'translationLanguages', lambda _, v: v.get('languageCode'))):
  2516. translation_language_code = translation_language['languageCode']
  2517. process_language(
  2518. automatic_captions, base_url, translation_language_code,
  2519. {'tlang': translation_language_code})
  2520. info['automatic_captions'] = automatic_captions
  2521. info['subtitles'] = subtitles
  2522. process_subtitles()
  2523. parsed_url = compat_urllib_parse_urlparse(url)
  2524. for component in (parsed_url.fragment, parsed_url.query):
  2525. query = compat_parse_qs(component)
  2526. for k, v in query.items():
  2527. for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
  2528. d_k += '_time'
  2529. if d_k not in info and k in s_ks:
  2530. info[d_k] = parse_duration(v[0])
  2531. if video_description:
  2532. # Youtube Music Auto-generated description
  2533. mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
  2534. if mobj:
  2535. release_year = mobj.group('release_year')
  2536. release_date = mobj.group('release_date')
  2537. if release_date:
  2538. release_date = release_date.replace('-', '')
  2539. if not release_year:
  2540. release_year = release_date[:4]
  2541. info.update({
  2542. 'album': mobj.group('album'.strip()),
  2543. 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
  2544. 'track': mobj.group('track').strip(),
  2545. 'release_date': release_date,
  2546. 'release_year': int_or_none(release_year),
  2547. })
  2548. initial_data = None
  2549. if webpage:
  2550. initial_data = self._extract_yt_initial_variable(
  2551. webpage, self._YT_INITIAL_DATA_RE, video_id,
  2552. 'yt initial data')
  2553. if not initial_data:
  2554. initial_data = self._call_api(
  2555. 'next', {'videoId': video_id}, video_id, fatal=False)
  2556. if initial_data:
  2557. chapters = self._extract_chapters_from_json(
  2558. initial_data, video_id, duration)
  2559. if not chapters:
  2560. for engagment_pannel in (initial_data.get('engagementPanels') or []):
  2561. contents = try_get(
  2562. engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
  2563. list)
  2564. if not contents:
  2565. continue
  2566. def chapter_time(mmlir):
  2567. return parse_duration(
  2568. get_text(mmlir.get('timeDescription')))
  2569. chapters = []
  2570. for next_num, content in enumerate(contents, start=1):
  2571. mmlir = content.get('macroMarkersListItemRenderer') or {}
  2572. start_time = chapter_time(mmlir)
  2573. end_time = (traverse_obj(
  2574. contents, (next_num, 'macroMarkersListItemRenderer', T(chapter_time)))
  2575. if next_num < len(contents) else duration)
  2576. if start_time is None or end_time is None:
  2577. continue
  2578. chapters.append({
  2579. 'start_time': start_time,
  2580. 'end_time': end_time,
  2581. 'title': get_text(mmlir.get('title')),
  2582. })
  2583. if chapters:
  2584. break
  2585. if chapters:
  2586. info['chapters'] = chapters
  2587. contents = try_get(
  2588. initial_data,
  2589. lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
  2590. list) or []
  2591. if not info['channel_id']:
  2592. channel_id = self._extract_channel_id('', renderers=contents)
  2593. if not info['uploader']:
  2594. info['uploader'] = self._extract_author_var('', 'name', renderers=contents)
  2595. if not owner_profile_url:
  2596. owner_profile_url = self._yt_urljoin(self._extract_author_var('', 'url', renderers=contents))
  2597. for content in contents:
  2598. vpir = content.get('videoPrimaryInfoRenderer')
  2599. if vpir:
  2600. stl = vpir.get('superTitleLink')
  2601. if stl:
  2602. stl = get_text(stl)
  2603. if try_get(
  2604. vpir,
  2605. lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
  2606. info['location'] = stl
  2607. else:
  2608. # •? doesn't match, but [•]? does; \xa0 = non-breaking space
  2609. mobj = re.search(r'([^\xa0\s].*?)[\xa0\s]*S(\d+)[\xa0\s]*[•]?[\xa0\s]*E(\d+)', stl)
  2610. if mobj:
  2611. info.update({
  2612. 'series': mobj.group(1),
  2613. 'season_number': int(mobj.group(2)),
  2614. 'episode_number': int(mobj.group(3)),
  2615. })
  2616. for tlb in (try_get(
  2617. vpir,
  2618. lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
  2619. list) or []):
  2620. tbr = traverse_obj(tlb, ('segmentedLikeDislikeButtonRenderer', 'likeButton', 'toggleButtonRenderer'), 'toggleButtonRenderer') or {}
  2621. for getter, regex in [(
  2622. lambda x: x['defaultText']['accessibility']['accessibilityData'],
  2623. r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
  2624. lambda x: x['accessibility'],
  2625. lambda x: x['accessibilityData']['accessibilityData'],
  2626. ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
  2627. label = (try_get(tbr, getter, dict) or {}).get('label')
  2628. if label:
  2629. mobj = re.match(regex, label)
  2630. if mobj:
  2631. info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
  2632. break
  2633. sbr_tooltip = try_get(
  2634. vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
  2635. if sbr_tooltip:
  2636. # however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
  2637. like_count, dislike_count = sbr_tooltip.split(' / ')
  2638. info.update({
  2639. 'like_count': str_to_int(like_count),
  2640. 'dislike_count': str_to_int(dislike_count),
  2641. })
  2642. else:
  2643. info['like_count'] = traverse_obj(vpir, (
  2644. 'videoActions', 'menuRenderer', 'topLevelButtons', Ellipsis,
  2645. 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
  2646. 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
  2647. 'buttonViewModel', (('title', ('accessibilityText', T(lambda s: s.split()), Ellipsis))), T(parse_count)),
  2648. get_all=False)
  2649. vsir = content.get('videoSecondaryInfoRenderer')
  2650. if vsir:
  2651. rows = try_get(
  2652. vsir,
  2653. lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
  2654. list) or []
  2655. multiple_songs = False
  2656. for row in rows:
  2657. if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
  2658. multiple_songs = True
  2659. break
  2660. for row in rows:
  2661. mrr = row.get('metadataRowRenderer') or {}
  2662. mrr_title = mrr.get('title')
  2663. if not mrr_title:
  2664. continue
  2665. mrr_title = get_text(mrr['title'])
  2666. mrr_contents_text = get_text(mrr['contents'][0])
  2667. if mrr_title == 'License':
  2668. info['license'] = mrr_contents_text
  2669. elif not multiple_songs:
  2670. if mrr_title == 'Album':
  2671. info['album'] = mrr_contents_text
  2672. elif mrr_title == 'Artist':
  2673. info['artist'] = mrr_contents_text
  2674. elif mrr_title == 'Song':
  2675. info['track'] = mrr_contents_text
  2676. # this is not extraction but spelunking!
  2677. carousel_lockups = traverse_obj(
  2678. initial_data,
  2679. ('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
  2680. 'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
  2681. 'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
  2682. expected_type=dict) or []
  2683. # try to reproduce logic from metadataRowContainerRenderer above (if it still is)
  2684. fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
  2685. # multiple_songs ?
  2686. if len(carousel_lockups) > 1:
  2687. fields = fields[-1:]
  2688. for info_row in traverse_obj(
  2689. carousel_lockups,
  2690. (0, 'carouselLockupRenderer', 'infoRows', Ellipsis, 'infoRowRenderer'),
  2691. expected_type=dict):
  2692. row_title = traverse_obj(info_row, ('title', 'simpleText'))
  2693. row_text = traverse_obj(info_row, 'defaultMetadata', 'expandedMetadata', expected_type=get_text)
  2694. if not row_text:
  2695. continue
  2696. for name, field in fields:
  2697. if name == row_title and not info.get(field):
  2698. info[field] = row_text
  2699. for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
  2700. v = info.get(s_k)
  2701. if v:
  2702. info[d_k] = v
  2703. self.mark_watched(video_id, player_response)
  2704. return merge_dicts(
  2705. info, {
  2706. 'uploader_id': self._extract_uploader_id(owner_profile_url),
  2707. 'uploader_url': owner_profile_url,
  2708. 'channel_id': channel_id,
  2709. 'channel_url': channel_id and self._yt_urljoin('/channel/' + channel_id),
  2710. 'channel': info['uploader'],
  2711. })
  2712. class YoutubeTabIE(YoutubeBaseInfoExtractor):
  2713. IE_DESC = 'YouTube.com tab'
  2714. _VALID_URL = r'''(?x)
  2715. https?://
  2716. (?:\w+\.)?
  2717. (?:
  2718. youtube(?:kids)?\.com|
  2719. invidio\.us
  2720. )/
  2721. (?:
  2722. (?:channel|c|user|feed|hashtag)/|
  2723. (?:playlist|watch)\?.*?\blist=|
  2724. (?!(?:watch|embed|v|e|results)\b)
  2725. )
  2726. (?P<id>[^/?\#&]+)
  2727. '''
  2728. IE_NAME = 'youtube:tab'
  2729. _TESTS = [{
  2730. # Shorts
  2731. 'url': 'https://www.youtube.com/@SuperCooperShorts/shorts',
  2732. 'playlist_mincount': 5,
  2733. 'info_dict': {
  2734. 'description': 'Short clips from Super Cooper Sundays!',
  2735. 'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
  2736. 'title': 'Super Cooper Shorts - Shorts',
  2737. 'uploader': 'Super Cooper Shorts',
  2738. 'uploader_id': '@SuperCooperShorts',
  2739. },
  2740. }, {
  2741. # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
  2742. 'url': 'https://www.youtube.com/@emergencyawesome/shorts',
  2743. 'info_dict': {
  2744. 'description': 'md5:592c080c06fef4de3c902c4a8eecd850',
  2745. 'id': 'UCDiFRMQWpcp8_KD4vwIVicw',
  2746. 'title': 'Emergency Awesome - Home',
  2747. },
  2748. 'playlist_mincount': 5,
  2749. 'skip': 'new test page needed to replace `Emergency Awesome - Shorts`',
  2750. }, {
  2751. # playlists, multipage
  2752. 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
  2753. 'playlist_mincount': 94,
  2754. 'info_dict': {
  2755. 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
  2756. 'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
  2757. 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
  2758. 'uploader': 'Igor Kleiner',
  2759. 'uploader_id': '@IgorDataScience',
  2760. },
  2761. }, {
  2762. # playlists, multipage, different order
  2763. 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
  2764. 'playlist_mincount': 94,
  2765. 'info_dict': {
  2766. 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
  2767. 'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
  2768. 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
  2769. 'uploader': 'Igor Kleiner',
  2770. 'uploader_id': '@IgorDataScience',
  2771. },
  2772. }, {
  2773. # playlists, series
  2774. 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
  2775. 'playlist_mincount': 5,
  2776. 'info_dict': {
  2777. 'id': 'UCYO_jab_esuFRV4b17AJtAw',
  2778. 'title': '3Blue1Brown - Playlists',
  2779. 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
  2780. 'uploader': '3Blue1Brown',
  2781. 'uploader_id': '@3blue1brown',
  2782. },
  2783. }, {
  2784. # playlists, singlepage
  2785. 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
  2786. 'playlist_mincount': 4,
  2787. 'info_dict': {
  2788. 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
  2789. 'title': 'ThirstForScience - Playlists',
  2790. 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
  2791. 'uploader': 'ThirstForScience',
  2792. 'uploader_id': '@ThirstForScience',
  2793. },
  2794. }, {
  2795. 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
  2796. 'only_matching': True,
  2797. }, {
  2798. # basic, single video playlist
  2799. 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
  2800. 'info_dict': {
  2801. 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
  2802. 'title': 'youtube-dl public playlist',
  2803. 'uploader': 'Sergey M.',
  2804. 'uploader_id': '@sergeym.6173',
  2805. 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
  2806. },
  2807. 'playlist_count': 1,
  2808. }, {
  2809. # empty playlist
  2810. 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
  2811. 'info_dict': {
  2812. 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
  2813. 'title': 'youtube-dl empty playlist',
  2814. 'uploader': 'Sergey M.',
  2815. 'uploader_id': '@sergeym.6173',
  2816. 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
  2817. },
  2818. 'playlist_count': 0,
  2819. }, {
  2820. # Home tab
  2821. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
  2822. 'info_dict': {
  2823. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  2824. 'title': 'lex will - Home',
  2825. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  2826. 'uploader': 'lex will',
  2827. 'uploader_id': '@lexwill718',
  2828. },
  2829. 'playlist_mincount': 2,
  2830. }, {
  2831. # Videos tab
  2832. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
  2833. 'info_dict': {
  2834. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  2835. 'title': 'lex will - Videos',
  2836. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  2837. 'uploader': 'lex will',
  2838. 'uploader_id': '@lexwill718',
  2839. },
  2840. 'playlist_mincount': 975,
  2841. }, {
  2842. # Videos tab, sorted by popular
  2843. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
  2844. 'info_dict': {
  2845. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  2846. 'title': 'lex will - Videos',
  2847. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  2848. 'uploader': 'lex will',
  2849. 'uploader_id': '@lexwill718',
  2850. },
  2851. 'playlist_mincount': 199,
  2852. }, {
  2853. # Playlists tab
  2854. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
  2855. 'info_dict': {
  2856. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  2857. 'title': 'lex will - Playlists',
  2858. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  2859. 'uploader': 'lex will',
  2860. 'uploader_id': '@lexwill718',
  2861. },
  2862. 'playlist_mincount': 17,
  2863. }, {
  2864. # Community tab
  2865. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
  2866. 'info_dict': {
  2867. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  2868. 'title': 'lex will - Community',
  2869. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  2870. 'uploader': 'lex will',
  2871. 'uploader_id': '@lexwill718',
  2872. },
  2873. 'playlist_mincount': 18,
  2874. }, {
  2875. # Channels tab
  2876. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
  2877. 'info_dict': {
  2878. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  2879. 'title': r're:lex will - (?:Home|Channels)',
  2880. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  2881. 'uploader': 'lex will',
  2882. 'uploader_id': '@lexwill718',
  2883. },
  2884. 'playlist_mincount': 75,
  2885. }, {
  2886. # Releases tab
  2887. 'url': 'https://www.youtube.com/@daftpunk/releases',
  2888. 'info_dict': {
  2889. 'id': 'UC_kRDKYrUlrbtrSiyu5Tflg',
  2890. 'title': 'Daft Punk - Releases',
  2891. 'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel',
  2892. 'uploader_id': '@daftpunk',
  2893. 'uploader': 'Daft Punk',
  2894. },
  2895. 'playlist_mincount': 36,
  2896. }, {
  2897. 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  2898. 'only_matching': True,
  2899. }, {
  2900. 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  2901. 'only_matching': True,
  2902. }, {
  2903. 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  2904. 'only_matching': True,
  2905. }, {
  2906. 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
  2907. 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
  2908. 'info_dict': {
  2909. 'title': '29C3: Not my department',
  2910. 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
  2911. 'uploader': 'Christiaan008',
  2912. 'uploader_id': '@ChRiStIaAn008',
  2913. 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
  2914. },
  2915. 'playlist_count': 96,
  2916. }, {
  2917. 'note': 'Large playlist',
  2918. 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
  2919. 'info_dict': {
  2920. 'title': 'Uploads from Cauchemar',
  2921. 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
  2922. 'uploader': 'Cauchemar',
  2923. 'uploader_id': '@Cauchemar89',
  2924. 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
  2925. },
  2926. 'playlist_mincount': 1123,
  2927. }, {
  2928. # even larger playlist, 8832 videos
  2929. 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
  2930. 'only_matching': True,
  2931. }, {
  2932. 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
  2933. 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
  2934. 'info_dict': {
  2935. 'title': 'Uploads from Interstellar Movie',
  2936. 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
  2937. 'uploader': 'Interstellar Movie',
  2938. 'uploader_id': '@InterstellarMovie',
  2939. 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
  2940. },
  2941. 'playlist_mincount': 21,
  2942. }, {
  2943. # https://github.com/ytdl-org/youtube-dl/issues/21844
  2944. 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
  2945. 'info_dict': {
  2946. 'title': 'Data Analysis with Dr Mike Pound',
  2947. 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
  2948. 'uploader': 'Computerphile',
  2949. 'uploader_id': '@Computerphile',
  2950. 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
  2951. },
  2952. 'playlist_mincount': 11,
  2953. }, {
  2954. 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
  2955. 'only_matching': True,
  2956. }, {
  2957. # Playlist URL that does not actually serve a playlist
  2958. 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
  2959. 'info_dict': {
  2960. 'id': 'FqZTN594JQw',
  2961. 'ext': 'webm',
  2962. 'title': "Smiley's People 01 detective, Adventure Series, Action",
  2963. 'uploader': 'STREEM',
  2964. 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
  2965. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
  2966. 'upload_date': '20150526',
  2967. 'license': 'Standard YouTube License',
  2968. 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
  2969. 'categories': ['People & Blogs'],
  2970. 'tags': list,
  2971. 'view_count': int,
  2972. 'like_count': int,
  2973. },
  2974. 'params': {
  2975. 'skip_download': True,
  2976. },
  2977. 'skip': 'This video is not available.',
  2978. 'add_ie': [YoutubeIE.ie_key()],
  2979. }, {
  2980. 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
  2981. 'only_matching': True,
  2982. }, {
  2983. 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
  2984. 'only_matching': True,
  2985. }, {
  2986. 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
  2987. 'info_dict': {
  2988. 'id': r're:[\da-zA-Z_-]{8,}',
  2989. 'ext': 'mp4',
  2990. 'title': r're:(?s)[A-Z].{20,}',
  2991. 'uploader': 'Sky News',
  2992. 'uploader_id': '@SkyNews',
  2993. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@SkyNews',
  2994. 'upload_date': r're:\d{8}',
  2995. 'description': r're:(?s)(?:.*\n)+SUBSCRIBE to our YouTube channel for more videos: http://www\.youtube\.com/skynews *\n.*',
  2996. 'categories': ['News & Politics'],
  2997. 'tags': list,
  2998. 'like_count': int,
  2999. },
  3000. 'params': {
  3001. 'skip_download': True,
  3002. },
  3003. }, {
  3004. 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
  3005. 'info_dict': {
  3006. 'id': 'a48o2S1cPoo',
  3007. 'ext': 'mp4',
  3008. 'title': 'The Young Turks - Live Main Show',
  3009. 'uploader': 'The Young Turks',
  3010. 'uploader_id': 'TheYoungTurks',
  3011. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
  3012. 'upload_date': '20150715',
  3013. 'license': 'Standard YouTube License',
  3014. 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
  3015. 'categories': ['News & Politics'],
  3016. 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
  3017. 'like_count': int,
  3018. },
  3019. 'params': {
  3020. 'skip_download': True,
  3021. },
  3022. 'only_matching': True,
  3023. }, {
  3024. 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
  3025. 'only_matching': True,
  3026. }, {
  3027. 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
  3028. 'only_matching': True,
  3029. }, {
  3030. 'url': 'https://www.youtube.com/feed/trending',
  3031. 'only_matching': True,
  3032. }, {
  3033. # needs auth
  3034. 'url': 'https://www.youtube.com/feed/library',
  3035. 'only_matching': True,
  3036. }, {
  3037. # needs auth
  3038. 'url': 'https://www.youtube.com/feed/history',
  3039. 'only_matching': True,
  3040. }, {
  3041. # needs auth
  3042. 'url': 'https://www.youtube.com/feed/subscriptions',
  3043. 'only_matching': True,
  3044. }, {
  3045. # needs auth
  3046. 'url': 'https://www.youtube.com/feed/watch_later',
  3047. 'only_matching': True,
  3048. }, {
  3049. # no longer available?
  3050. 'url': 'https://www.youtube.com/feed/recommended',
  3051. 'only_matching': True,
  3052. }, {
  3053. # inline playlist with not always working continuations
  3054. 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
  3055. 'only_matching': True,
  3056. }, {
  3057. 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
  3058. 'only_matching': True,
  3059. }, {
  3060. 'url': 'https://www.youtube.com/course',
  3061. 'only_matching': True,
  3062. }, {
  3063. 'url': 'https://www.youtube.com/zsecurity',
  3064. 'only_matching': True,
  3065. }, {
  3066. 'url': 'http://www.youtube.com/NASAgovVideo/videos',
  3067. 'only_matching': True,
  3068. }, {
  3069. 'url': 'https://www.youtube.com/TheYoungTurks/live',
  3070. 'only_matching': True,
  3071. }, {
  3072. 'url': 'https://www.youtube.com/hashtag/cctv9',
  3073. 'info_dict': {
  3074. 'id': 'cctv9',
  3075. 'title': '#cctv9',
  3076. },
  3077. 'playlist_mincount': 350,
  3078. }, {
  3079. 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
  3080. 'only_matching': True,
  3081. }, {
  3082. 'note': 'Search tab',
  3083. 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
  3084. 'playlist_mincount': 20,
  3085. 'info_dict': {
  3086. 'id': 'UCYO_jab_esuFRV4b17AJtAw',
  3087. 'title': '3Blue1Brown - Search - linear algebra',
  3088. 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
  3089. 'uploader': '3Blue1Brown',
  3090. 'uploader_id': '@3blue1brown',
  3091. 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
  3092. },
  3093. }]
  3094. @classmethod
  3095. def suitable(cls, url):
  3096. return not YoutubeIE.suitable(url) and super(
  3097. YoutubeTabIE, cls).suitable(url)
  3098. @staticmethod
  3099. def _extract_grid_item_renderer(item):
  3100. assert isinstance(item, dict)
  3101. for key, renderer in item.items():
  3102. if not key.startswith('grid') or not key.endswith('Renderer'):
  3103. continue
  3104. if not isinstance(renderer, dict):
  3105. continue
  3106. return renderer
  3107. @staticmethod
  3108. def _get_text(r, k):
  3109. return traverse_obj(
  3110. r, (k, 'runs', 0, 'text'), (k, 'simpleText'),
  3111. expected_type=txt_or_none)
  3112. def _grid_entries(self, grid_renderer):
  3113. for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))):
  3114. lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict)))
  3115. if lockup_view_model:
  3116. entry = self._extract_lockup_view_model(lockup_view_model)
  3117. if entry:
  3118. yield entry
  3119. continue
  3120. renderer = self._extract_grid_item_renderer(item)
  3121. if not isinstance(renderer, dict):
  3122. continue
  3123. title = self._get_text(renderer, 'title')
  3124. # playlist
  3125. playlist_id = renderer.get('playlistId')
  3126. if playlist_id:
  3127. yield self.url_result(
  3128. 'https://www.youtube.com/playlist?list=%s' % playlist_id,
  3129. ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
  3130. video_title=title)
  3131. continue
  3132. # video
  3133. video_id = renderer.get('videoId')
  3134. if video_id:
  3135. yield self._extract_video(renderer)
  3136. continue
  3137. # channel
  3138. channel_id = renderer.get('channelId')
  3139. if channel_id:
  3140. title = self._get_text(renderer, 'title')
  3141. yield self.url_result(
  3142. 'https://www.youtube.com/channel/%s' % channel_id,
  3143. ie=YoutubeTabIE.ie_key(), video_title=title)
  3144. continue
  3145. # generic endpoint URL support
  3146. ep_url = urljoin('https://www.youtube.com/', try_get(
  3147. renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
  3148. compat_str))
  3149. if ep_url:
  3150. for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
  3151. if ie.suitable(ep_url):
  3152. yield self.url_result(
  3153. ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
  3154. break
  3155. def _shelf_entries_from_content(self, shelf_renderer):
  3156. content = shelf_renderer.get('content')
  3157. if not isinstance(content, dict):
  3158. return
  3159. renderer = content.get('gridRenderer')
  3160. if renderer:
  3161. # TODO: add support for nested playlists so each shelf is processed
  3162. # as separate playlist
  3163. # TODO: this includes only first N items
  3164. for entry in self._grid_entries(renderer):
  3165. yield entry
  3166. renderer = content.get('horizontalListRenderer')
  3167. if renderer:
  3168. # TODO
  3169. pass
  3170. def _shelf_entries(self, shelf_renderer, skip_channels=False):
  3171. ep = try_get(
  3172. shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
  3173. compat_str)
  3174. shelf_url = urljoin('https://www.youtube.com', ep)
  3175. if shelf_url:
  3176. # Skipping links to another channels, note that checking for
  3177. # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
  3178. # will not work
  3179. if skip_channels and '/channels?' in shelf_url:
  3180. return
  3181. title = try_get(
  3182. shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
  3183. yield self.url_result(shelf_url, video_title=title)
  3184. # Shelf may not contain shelf URL, fallback to extraction from content
  3185. for entry in self._shelf_entries_from_content(shelf_renderer):
  3186. yield entry
  3187. def _playlist_entries(self, video_list_renderer):
  3188. for content in video_list_renderer['contents']:
  3189. if not isinstance(content, dict):
  3190. continue
  3191. renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
  3192. if not isinstance(renderer, dict):
  3193. continue
  3194. video_id = renderer.get('videoId')
  3195. if not video_id:
  3196. continue
  3197. yield self._extract_video(renderer)
  3198. def _extract_lockup_view_model(self, view_model):
  3199. content_id = view_model.get('contentId')
  3200. if not content_id:
  3201. return
  3202. content_type = view_model.get('contentType')
  3203. if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
  3204. self.report_warning(
  3205. 'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
  3206. return
  3207. return merge_dicts(self.url_result(
  3208. update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
  3209. ie=YoutubeTabIE.ie_key(), video_id=content_id), {
  3210. 'title': traverse_obj(view_model, (
  3211. 'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
  3212. 'thumbnails': self._extract_thumbnails(view_model, (
  3213. 'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
  3214. 'thumbnailViewModel', 'image'), final_key='sources'),
  3215. })
  3216. def _extract_shorts_lockup_view_model(self, view_model):
  3217. content_id = traverse_obj(view_model, (
  3218. 'onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId',
  3219. T(lambda v: v if YoutubeIE.suitable(v) else None)))
  3220. return merge_dicts(self.url_result(
  3221. content_id, ie=YoutubeIE.ie_key(), video_id=content_id), {
  3222. 'title': traverse_obj(view_model, (
  3223. 'overlayMetadata', 'primaryText', 'content', T(compat_str))),
  3224. 'thumbnails': self._extract_thumbnails(
  3225. view_model, 'thumbnail', final_key='sources'),
  3226. }) if content_id else None
  3227. def _video_entry(self, video_renderer):
  3228. video_id = video_renderer.get('videoId')
  3229. return self._extract_video(video_renderer) if video_id else None
  3230. def _post_thread_entries(self, post_thread_renderer):
  3231. post_renderer = try_get(
  3232. post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
  3233. if not post_renderer:
  3234. return
  3235. # video attachment
  3236. video_renderer = try_get(
  3237. post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
  3238. video_id = None
  3239. if video_renderer:
  3240. entry = self._video_entry(video_renderer)
  3241. if entry:
  3242. yield entry
  3243. # inline video links
  3244. runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
  3245. for run in runs:
  3246. if not isinstance(run, dict):
  3247. continue
  3248. ep_url = try_get(
  3249. run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
  3250. if not ep_url:
  3251. continue
  3252. if not YoutubeIE.suitable(ep_url):
  3253. continue
  3254. ep_video_id = YoutubeIE._match_id(ep_url)
  3255. if video_id == ep_video_id:
  3256. continue
  3257. yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
  3258. def _post_thread_continuation_entries(self, post_thread_continuation):
  3259. contents = post_thread_continuation.get('contents')
  3260. if not isinstance(contents, list):
  3261. return
  3262. for content in contents:
  3263. renderer = content.get('backstagePostThreadRenderer')
  3264. if not isinstance(renderer, dict):
  3265. continue
  3266. for entry in self._post_thread_entries(renderer):
  3267. yield entry
  3268. def _rich_grid_entries(self, contents):
  3269. for content in traverse_obj(
  3270. contents, (Ellipsis, 'richItemRenderer', 'content'),
  3271. expected_type=dict):
  3272. video_renderer = traverse_obj(
  3273. content, 'videoRenderer', 'reelItemRenderer',
  3274. expected_type=dict)
  3275. if video_renderer:
  3276. entry = self._video_entry(video_renderer)
  3277. if entry:
  3278. yield entry
  3279. # shorts item
  3280. shorts_lockup_view_model = content.get('shortsLockupViewModel')
  3281. if shorts_lockup_view_model:
  3282. entry = self._extract_shorts_lockup_view_model(shorts_lockup_view_model)
  3283. if entry:
  3284. yield entry
  3285. # playlist
  3286. renderer = traverse_obj(
  3287. content, 'playlistRenderer', expected_type=dict) or {}
  3288. title = self._get_text(renderer, 'title')
  3289. playlist_id = renderer.get('playlistId')
  3290. if playlist_id:
  3291. yield self.url_result(
  3292. 'https://www.youtube.com/playlist?list=%s' % playlist_id,
  3293. ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
  3294. video_title=title)
  3295. @staticmethod
  3296. def _build_continuation_query(continuation, ctp=None):
  3297. query = {
  3298. 'ctoken': continuation,
  3299. 'continuation': continuation,
  3300. }
  3301. if ctp:
  3302. query['itct'] = ctp
  3303. return query
  3304. @staticmethod
  3305. def _extract_next_continuation_data(renderer):
  3306. next_continuation = try_get(
  3307. renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
  3308. if not next_continuation:
  3309. return
  3310. continuation = next_continuation.get('continuation')
  3311. if not continuation:
  3312. return
  3313. ctp = next_continuation.get('clickTrackingParams')
  3314. return YoutubeTabIE._build_continuation_query(continuation, ctp)
  3315. @classmethod
  3316. def _extract_continuation(cls, renderer):
  3317. next_continuation = cls._extract_next_continuation_data(renderer)
  3318. if next_continuation:
  3319. return next_continuation
  3320. for command in traverse_obj(renderer, (
  3321. ('contents', 'items', 'rows'), Ellipsis, 'continuationItemRenderer',
  3322. ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
  3323. (('commandExecutorCommand', 'commands', Ellipsis), None), T(dict))):
  3324. continuation = traverse_obj(command, ('continuationCommand', 'token', T(compat_str)))
  3325. if not continuation:
  3326. continue
  3327. ctp = command.get('clickTrackingParams')
  3328. return cls._build_continuation_query(continuation, ctp)
  3329. def _entries(self, tab, item_id, webpage):
  3330. tab_content = try_get(tab, lambda x: x['content'], dict)
  3331. if not tab_content:
  3332. return
  3333. slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
  3334. if slr_renderer:
  3335. is_channels_tab = tab.get('title') == 'Channels'
  3336. continuation = None
  3337. slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
  3338. for slr_content in slr_contents:
  3339. if not isinstance(slr_content, dict):
  3340. continue
  3341. is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
  3342. if not is_renderer:
  3343. continue
  3344. isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
  3345. for isr_content in isr_contents:
  3346. if not isinstance(isr_content, dict):
  3347. continue
  3348. renderer = isr_content.get('playlistVideoListRenderer')
  3349. if renderer:
  3350. for entry in self._playlist_entries(renderer):
  3351. yield entry
  3352. continuation = self._extract_continuation(renderer)
  3353. continue
  3354. renderer = isr_content.get('gridRenderer')
  3355. if renderer:
  3356. for entry in self._grid_entries(renderer):
  3357. yield entry
  3358. continuation = self._extract_continuation(renderer)
  3359. continue
  3360. renderer = isr_content.get('shelfRenderer')
  3361. if renderer:
  3362. for entry in self._shelf_entries(renderer, not is_channels_tab):
  3363. yield entry
  3364. continue
  3365. renderer = isr_content.get('backstagePostThreadRenderer')
  3366. if renderer:
  3367. for entry in self._post_thread_entries(renderer):
  3368. yield entry
  3369. continuation = self._extract_continuation(renderer)
  3370. continue
  3371. renderer = isr_content.get('videoRenderer')
  3372. if renderer:
  3373. entry = self._video_entry(renderer)
  3374. if entry:
  3375. yield entry
  3376. renderer = isr_content.get('richGridRenderer')
  3377. if renderer:
  3378. for from_ in self._rich_grid_entries(
  3379. traverse_obj(renderer, ('contents', Ellipsis, T(dict)))):
  3380. yield from_
  3381. continuation = self._extract_continuation(renderer)
  3382. continue
  3383. if not continuation:
  3384. continuation = self._extract_continuation(is_renderer)
  3385. if not continuation:
  3386. continuation = self._extract_continuation(slr_renderer)
  3387. else:
  3388. rich_grid_renderer = tab_content.get('richGridRenderer')
  3389. if not rich_grid_renderer:
  3390. return
  3391. for from_ in self._rich_grid_entries(
  3392. traverse_obj(rich_grid_renderer, ('contents', Ellipsis, T(dict)))):
  3393. yield from_
  3394. continuation = self._extract_continuation(rich_grid_renderer)
  3395. ytcfg = self._extract_ytcfg(item_id, webpage)
  3396. client_version = try_get(
  3397. ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'
  3398. headers = {
  3399. 'x-youtube-client-name': '1',
  3400. 'x-youtube-client-version': client_version,
  3401. 'content-type': 'application/json',
  3402. }
  3403. context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
  3404. 'client': {
  3405. 'clientName': 'WEB',
  3406. 'clientVersion': client_version,
  3407. },
  3408. }
  3409. visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
  3410. identity_token = self._extract_identity_token(ytcfg, webpage)
  3411. if identity_token:
  3412. headers['x-youtube-identity-token'] = identity_token
  3413. data = {
  3414. 'context': context,
  3415. }
  3416. for page_num in itertools.count(1):
  3417. if not continuation:
  3418. break
  3419. if visitor_data:
  3420. headers['X-Goog-Visitor-Id'] = visitor_data
  3421. data['continuation'] = continuation['continuation']
  3422. data['clickTracking'] = {
  3423. 'clickTrackingParams': continuation['itct'],
  3424. }
  3425. count = 0
  3426. retries = 3
  3427. while count <= retries:
  3428. try:
  3429. # Downloading page may result in intermittent 5xx HTTP error
  3430. # that is usually worked around with a retry
  3431. response = self._download_json(
  3432. 'https://www.youtube.com/youtubei/v1/browse',
  3433. None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
  3434. query={
  3435. # 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  3436. 'prettyPrint': 'false',
  3437. },
  3438. headers=headers, data=json.dumps(data).encode('utf8'))
  3439. break
  3440. except ExtractorError as e:
  3441. if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
  3442. count += 1
  3443. if count <= retries:
  3444. continue
  3445. raise
  3446. if not response:
  3447. break
  3448. visitor_data = try_get(
  3449. response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
  3450. continuation_contents = try_get(
  3451. response, lambda x: x['continuationContents'], dict)
  3452. if continuation_contents:
  3453. continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
  3454. if continuation_renderer:
  3455. for entry in self._playlist_entries(continuation_renderer):
  3456. yield entry
  3457. continuation = self._extract_continuation(continuation_renderer)
  3458. continue
  3459. continuation_renderer = continuation_contents.get('gridContinuation')
  3460. if continuation_renderer:
  3461. for entry in self._grid_entries(continuation_renderer):
  3462. yield entry
  3463. continuation = self._extract_continuation(continuation_renderer)
  3464. continue
  3465. continuation_renderer = continuation_contents.get('itemSectionContinuation')
  3466. if continuation_renderer:
  3467. for entry in self._post_thread_continuation_entries(continuation_renderer):
  3468. yield entry
  3469. continuation = self._extract_continuation(continuation_renderer)
  3470. continue
  3471. on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
  3472. continuation_items = try_get(
  3473. on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
  3474. if continuation_items:
  3475. continuation_item = continuation_items[0]
  3476. if not isinstance(continuation_item, dict):
  3477. continue
  3478. renderer = self._extract_grid_item_renderer(continuation_item)
  3479. if renderer:
  3480. grid_renderer = {'items': continuation_items}
  3481. for entry in self._grid_entries(grid_renderer):
  3482. yield entry
  3483. continuation = self._extract_continuation(grid_renderer)
  3484. continue
  3485. renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
  3486. if renderer:
  3487. video_list_renderer = {'contents': continuation_items}
  3488. for entry in self._playlist_entries(video_list_renderer):
  3489. yield entry
  3490. continuation = self._extract_continuation(video_list_renderer)
  3491. continue
  3492. renderer = continuation_item.get('backstagePostThreadRenderer')
  3493. if renderer:
  3494. continuation_renderer = {'contents': continuation_items}
  3495. for entry in self._post_thread_continuation_entries(continuation_renderer):
  3496. yield entry
  3497. continuation = self._extract_continuation(continuation_renderer)
  3498. continue
  3499. renderer = continuation_item.get('richItemRenderer')
  3500. if renderer:
  3501. for entry in self._rich_grid_entries(continuation_items):
  3502. yield entry
  3503. continuation = self._extract_continuation({'contents': continuation_items})
  3504. continue
  3505. break
  3506. @staticmethod
  3507. def _extract_selected_tab(tabs):
  3508. for tab in tabs:
  3509. renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
  3510. if renderer.get('selected') is True:
  3511. return renderer
  3512. else:
  3513. raise ExtractorError('Unable to find selected tab')
  3514. def _extract_uploader(self, metadata, data):
  3515. uploader = {}
  3516. renderers = traverse_obj(data,
  3517. ('sidebar', 'playlistSidebarRenderer', 'items'))
  3518. uploader['channel_id'] = self._extract_channel_id('', metadata=metadata, renderers=renderers)
  3519. uploader['uploader'] = (
  3520. self._extract_author_var('', 'name', renderers=renderers)
  3521. or self._extract_author_var('', 'name', metadata=metadata))
  3522. uploader['uploader_url'] = self._yt_urljoin(
  3523. self._extract_author_var('', 'url', metadata=metadata, renderers=renderers))
  3524. uploader['uploader_id'] = self._extract_uploader_id(uploader['uploader_url'])
  3525. uploader['channel'] = uploader['uploader']
  3526. return uploader
  3527. @classmethod
  3528. def _extract_alert(cls, data):
  3529. alerts = []
  3530. for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict):
  3531. alert_text = traverse_obj(
  3532. alert, (None, lambda x: x['alertRenderer']['text']), get_all=False)
  3533. if not alert_text:
  3534. continue
  3535. text = cls._get_text(alert_text, 'text')
  3536. if text:
  3537. alerts.append(text)
  3538. return '\n'.join(alerts)
  3539. def _extract_from_tabs(self, item_id, webpage, data, tabs):
  3540. selected_tab = self._extract_selected_tab(tabs)
  3541. renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'),
  3542. expected_type=dict) or {}
  3543. playlist_id = item_id
  3544. title = description = None
  3545. if renderer:
  3546. channel_title = txt_or_none(renderer.get('title')) or item_id
  3547. tab_title = txt_or_none(selected_tab.get('title'))
  3548. title = join_nonempty(
  3549. channel_title or item_id, tab_title,
  3550. txt_or_none(selected_tab.get('expandedText')),
  3551. delim=' - ')
  3552. description = txt_or_none(renderer.get('description'))
  3553. playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id
  3554. else:
  3555. renderer = traverse_obj(data,
  3556. ('metadata', 'playlistMetadataRenderer'),
  3557. ('header', 'hashtagHeaderRenderer'),
  3558. expected_type=dict) or {}
  3559. title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'),
  3560. expected_type=txt_or_none)
  3561. playlist = self.playlist_result(
  3562. self._entries(selected_tab, item_id, webpage),
  3563. playlist_id=playlist_id, playlist_title=title,
  3564. playlist_description=description)
  3565. return merge_dicts(playlist, self._extract_uploader(renderer, data))
  3566. def _extract_from_playlist(self, item_id, url, data, playlist):
  3567. title = traverse_obj((playlist, data),
  3568. (0, 'title'), (1, 'titleText', 'simpleText'),
  3569. expected_type=txt_or_none)
  3570. playlist_id = txt_or_none(playlist.get('playlistId')) or item_id
  3571. # Inline playlist rendition continuation does not always work
  3572. # at Youtube side, so delegating regular tab-based playlist URL
  3573. # processing whenever possible.
  3574. playlist_url = urljoin(url, traverse_obj(
  3575. playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
  3576. expected_type=url_or_none))
  3577. if playlist_url and playlist_url != url:
  3578. return self.url_result(
  3579. playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
  3580. video_title=title)
  3581. return self.playlist_result(
  3582. self._playlist_entries(playlist), playlist_id=playlist_id,
  3583. playlist_title=title)
  3584. def _extract_identity_token(self, ytcfg, webpage):
  3585. if ytcfg:
  3586. token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
  3587. if token:
  3588. return token
  3589. return self._search_regex(
  3590. r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
  3591. 'identity token', default=None)
  3592. def _real_extract(self, url):
  3593. item_id = self._match_id(url)
  3594. url = update_url(url, netloc='www.youtube.com')
  3595. qs = parse_qs(url)
  3596. def qs_get(key, default=None):
  3597. return qs.get(key, [default])[-1]
  3598. # Go around for /feeds/videos.xml?playlist_id={pl_id}
  3599. if item_id == 'feeds' and '/feeds/videos.xml?' in url:
  3600. playlist_id = qs_get('playlist_id')
  3601. if playlist_id:
  3602. return self.url_result(
  3603. update_url_query('https://www.youtube.com/playlist', {
  3604. 'list': playlist_id,
  3605. }), ie=self.ie_key(), video_id=playlist_id)
  3606. # Handle both video/playlist URLs
  3607. video_id = qs_get('v')
  3608. playlist_id = qs_get('list')
  3609. if video_id and playlist_id:
  3610. if self._downloader.params.get('noplaylist'):
  3611. self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
  3612. return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
  3613. self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  3614. webpage = self._download_webpage(url, item_id)
  3615. data = self._extract_yt_initial_data(item_id, webpage)
  3616. tabs = try_get(
  3617. data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
  3618. if tabs:
  3619. return self._extract_from_tabs(item_id, webpage, data, tabs)
  3620. playlist = try_get(
  3621. data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
  3622. if playlist:
  3623. return self._extract_from_playlist(item_id, url, data, playlist)
  3624. # Fallback to video extraction if no playlist alike page is recognized.
  3625. # First check for the current video then try the v attribute of URL query.
  3626. video_id = try_get(
  3627. data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
  3628. compat_str) or video_id
  3629. if video_id:
  3630. return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
  3631. # Capture and output alerts
  3632. alert = self._extract_alert(data)
  3633. if alert:
  3634. raise ExtractorError(alert, expected=True)
  3635. # Failed to recognize
  3636. raise ExtractorError('Unable to recognize tab page')
  3637. class YoutubePlaylistIE(InfoExtractor):
  3638. IE_DESC = 'YouTube.com playlists'
  3639. _VALID_URL = r'''(?x)(?:
  3640. (?:https?://)?
  3641. (?:\w+\.)?
  3642. (?:
  3643. (?:
  3644. youtube(?:kids)?\.com|
  3645. invidio\.us
  3646. )
  3647. /.*?\?.*?\blist=
  3648. )?
  3649. (?P<id>%(playlist_id)s)
  3650. )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
  3651. IE_NAME = 'youtube:playlist'
  3652. _TESTS = [{
  3653. 'note': 'issue #673',
  3654. 'url': 'PLBB231211A4F62143',
  3655. 'info_dict': {
  3656. 'title': '[OLD]Team Fortress 2 (Class-based LP)',
  3657. 'id': 'PLBB231211A4F62143',
  3658. 'uploader': 'Wickman',
  3659. 'uploader_id': '@WickmanVT',
  3660. 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
  3661. },
  3662. 'playlist_mincount': 29,
  3663. }, {
  3664. 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
  3665. 'info_dict': {
  3666. 'title': 'YDL_safe_search',
  3667. 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
  3668. },
  3669. 'playlist_count': 2,
  3670. 'skip': 'This playlist is private',
  3671. }, {
  3672. 'note': 'embedded',
  3673. 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
  3674. # TODO: full playlist requires _reload_with_unavailable_videos()
  3675. # 'playlist_count': 4,
  3676. 'playlist_mincount': 1,
  3677. 'info_dict': {
  3678. 'title': 'JODA15',
  3679. 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
  3680. 'uploader': 'milan',
  3681. 'uploader_id': '@milan5503',
  3682. 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
  3683. },
  3684. }, {
  3685. 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
  3686. 'playlist_mincount': 455,
  3687. 'info_dict': {
  3688. 'title': '2018 Chinese New Singles (11/6 updated)',
  3689. 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
  3690. 'uploader': 'LBK',
  3691. 'uploader_id': '@music_king',
  3692. 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
  3693. },
  3694. }, {
  3695. 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
  3696. 'only_matching': True,
  3697. }, {
  3698. # music album playlist
  3699. 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
  3700. 'only_matching': True,
  3701. }]
  3702. @classmethod
  3703. def suitable(cls, url):
  3704. if YoutubeTabIE.suitable(url):
  3705. return False
  3706. if parse_qs(url).get('v', [None])[0]:
  3707. return False
  3708. return super(YoutubePlaylistIE, cls).suitable(url)
  3709. def _real_extract(self, url):
  3710. playlist_id = self._match_id(url)
  3711. qs = parse_qs(url)
  3712. if not qs:
  3713. qs = {'list': playlist_id}
  3714. return self.url_result(
  3715. update_url_query('https://www.youtube.com/playlist', qs),
  3716. ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
  3717. class YoutubeYtBeIE(InfoExtractor):
  3718. _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
  3719. _TESTS = [{
  3720. 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
  3721. 'info_dict': {
  3722. 'id': 'yeWKywCrFtk',
  3723. 'ext': 'mp4',
  3724. 'title': 'Small Scale Baler and Braiding Rugs',
  3725. 'uploader': 'Backus-Page House Museum',
  3726. 'uploader_id': '@backuspagemuseum',
  3727. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
  3728. 'upload_date': '20161008',
  3729. 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
  3730. 'categories': ['Nonprofits & Activism'],
  3731. 'tags': list,
  3732. 'like_count': int,
  3733. },
  3734. 'params': {
  3735. 'noplaylist': True,
  3736. 'skip_download': True,
  3737. },
  3738. }, {
  3739. 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
  3740. 'only_matching': True,
  3741. }]
  3742. def _real_extract(self, url):
  3743. mobj = re.match(self._VALID_URL, url)
  3744. video_id = mobj.group('id')
  3745. playlist_id = mobj.group('playlist_id')
  3746. return self.url_result(
  3747. update_url_query('https://www.youtube.com/watch', {
  3748. 'v': video_id,
  3749. 'list': playlist_id,
  3750. 'feature': 'youtu.be',
  3751. }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
  3752. class YoutubeYtUserIE(InfoExtractor):
  3753. _VALID_URL = r'ytuser:(?P<id>.+)'
  3754. _TESTS = [{
  3755. 'url': 'ytuser:phihag',
  3756. 'only_matching': True,
  3757. }]
  3758. def _real_extract(self, url):
  3759. user_id = self._match_id(url)
  3760. return self.url_result(
  3761. 'https://www.youtube.com/user/%s' % user_id,
  3762. ie=YoutubeTabIE.ie_key(), video_id=user_id)
  3763. class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
  3764. IE_NAME = 'youtube:favorites'
  3765. IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
  3766. _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
  3767. _LOGIN_REQUIRED = True
  3768. _TESTS = [{
  3769. 'url': ':ytfav',
  3770. 'only_matching': True,
  3771. }, {
  3772. 'url': ':ytfavorites',
  3773. 'only_matching': True,
  3774. }]
  3775. def _real_extract(self, url):
  3776. return self.url_result(
  3777. 'https://www.youtube.com/playlist?list=LL',
  3778. ie=YoutubeTabIE.ie_key())
  3779. class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
  3780. IE_DESC = 'YouTube.com searches'
  3781. IE_NAME = 'youtube:search'
  3782. _SEARCH_KEY = 'ytsearch'
  3783. _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
  3784. _MAX_RESULTS = float('inf')
  3785. _TESTS = [{
  3786. 'url': 'ytsearch10:youtube-dl test video',
  3787. 'playlist_count': 10,
  3788. 'info_dict': {
  3789. 'id': 'youtube-dl test video',
  3790. 'title': 'youtube-dl test video',
  3791. },
  3792. }]
  3793. def _get_n_results(self, query, n):
  3794. """Get a specified number of results for a query"""
  3795. entries = itertools.islice(self._search_results(query, self._SEARCH_PARAMS), 0, None if n == float('inf') else n)
  3796. return self.playlist_result(entries, query, query)
  3797. class YoutubeSearchDateIE(YoutubeSearchIE):
  3798. IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
  3799. _SEARCH_KEY = 'ytsearchdate'
  3800. IE_DESC = 'YouTube.com searches, newest videos first'
  3801. _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
  3802. _TESTS = [{
  3803. 'url': 'ytsearchdate10:youtube-dl test video',
  3804. 'playlist_count': 10,
  3805. 'info_dict': {
  3806. 'id': 'youtube-dl test video',
  3807. 'title': 'youtube-dl test video',
  3808. },
  3809. }]
  3810. class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
  3811. IE_DESC = 'YouTube search URLs with sorting and filter support'
  3812. IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
  3813. _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
  3814. _TESTS = [{
  3815. 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
  3816. 'playlist_mincount': 5,
  3817. 'info_dict': {
  3818. 'id': 'youtube-dl test video',
  3819. 'title': 'youtube-dl test video',
  3820. },
  3821. 'params': {'playlistend': 5},
  3822. }, {
  3823. 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
  3824. 'only_matching': True,
  3825. }]
  3826. def _real_extract(self, url):
  3827. qs = parse_qs(url)
  3828. query = (qs.get('search_query') or qs.get('q'))[-1]
  3829. params = qs.get('sp', ('',))[-1]
  3830. return self.playlist_result(self._search_results(query, params), query, query)
  3831. class YoutubeFeedsInfoExtractor(YoutubeTabIE):
  3832. """
  3833. Base class for feed extractors
  3834. Subclasses must define the _FEED_NAME property.
  3835. """
  3836. _LOGIN_REQUIRED = True
  3837. @property
  3838. def IE_NAME(self):
  3839. return 'youtube:%s' % self._FEED_NAME
  3840. def _real_initialize(self):
  3841. self._login()
  3842. def _real_extract(self, url):
  3843. return self.url_result(
  3844. 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
  3845. ie=YoutubeTabIE.ie_key())
  3846. class YoutubeWatchLaterIE(InfoExtractor):
  3847. IE_NAME = 'youtube:watchlater'
  3848. IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
  3849. _VALID_URL = r':ytwatchlater'
  3850. _TESTS = [{
  3851. 'url': ':ytwatchlater',
  3852. 'only_matching': True,
  3853. }]
  3854. def _real_extract(self, url):
  3855. return self.url_result(
  3856. 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
  3857. class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
  3858. IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
  3859. _VALID_URL = r':ytrec(?:ommended)?'
  3860. _FEED_NAME = 'recommended'
  3861. _TESTS = [{
  3862. 'url': ':ytrec',
  3863. 'only_matching': True,
  3864. }, {
  3865. 'url': ':ytrecommended',
  3866. 'only_matching': True,
  3867. }]
  3868. class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
  3869. IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
  3870. _VALID_URL = r':ytsubs(?:criptions)?'
  3871. _FEED_NAME = 'subscriptions'
  3872. _TESTS = [{
  3873. 'url': ':ytsubs',
  3874. 'only_matching': True,
  3875. }, {
  3876. 'url': ':ytsubscriptions',
  3877. 'only_matching': True,
  3878. }]
  3879. class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
  3880. IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
  3881. _VALID_URL = r':ythistory'
  3882. _FEED_NAME = 'history'
  3883. _TESTS = [{
  3884. 'url': ':ythistory',
  3885. 'only_matching': True,
  3886. }]
  3887. class YoutubeTruncatedURLIE(InfoExtractor):
  3888. IE_NAME = 'youtube:truncated_url'
  3889. IE_DESC = False # Do not list
  3890. _VALID_URL = r'''(?x)
  3891. (?:https?://)?
  3892. (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
  3893. (?:watch\?(?:
  3894. feature=[a-z_]+|
  3895. annotation_id=annotation_[^&]+|
  3896. x-yt-cl=[0-9]+|
  3897. hl=[^&]*|
  3898. t=[0-9]+
  3899. )?
  3900. |
  3901. attribution_link\?a=[^&]+
  3902. )
  3903. $
  3904. '''
  3905. _TESTS = [{
  3906. 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
  3907. 'only_matching': True,
  3908. }, {
  3909. 'url': 'https://www.youtube.com/watch?',
  3910. 'only_matching': True,
  3911. }, {
  3912. 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
  3913. 'only_matching': True,
  3914. }, {
  3915. 'url': 'https://www.youtube.com/watch?feature=foo',
  3916. 'only_matching': True,
  3917. }, {
  3918. 'url': 'https://www.youtube.com/watch?hl=en-GB',
  3919. 'only_matching': True,
  3920. }, {
  3921. 'url': 'https://www.youtube.com/watch?t=2372',
  3922. 'only_matching': True,
  3923. }]
  3924. def _real_extract(self, url):
  3925. raise ExtractorError(
  3926. 'Did you forget to quote the URL? Remember that & is a meta '
  3927. 'character in most shells, so you want to put the URL in quotes, '
  3928. 'like youtube-dl '
  3929. '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
  3930. ' or simply youtube-dl BaW_jenozKc .',
  3931. expected=True)
  3932. class YoutubeTruncatedIDIE(InfoExtractor):
  3933. IE_NAME = 'youtube:truncated_id'
  3934. IE_DESC = False # Do not list
  3935. _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
  3936. _TESTS = [{
  3937. 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
  3938. 'only_matching': True,
  3939. }]
  3940. def _real_extract(self, url):
  3941. video_id = self._match_id(url)
  3942. raise ExtractorError(
  3943. 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
  3944. expected=True)