Исправление экстрактора yandex music для yt-dlp

Posted on Сб 27 июня 2026 in misc

Источник

https://github.com/DesSolo/yt-dlp/blob/master/yt_dlp/extractor/yandexmusic.py

Клод справился, PR к сожалению не будет.

Авторизацию переделали на oAuth. Токен можно получить тут

23cabbbdc6cd418abb4b39c32c41195d - это официальный client_id приложения Яндекс.Музыки (публично известный)

./yt-dlp.sh --extractor-args "yandexmusic:token=<TOKEN>"

/usr/lib/python3/dist-packages/yt_dlp/extractor/yandexmusic.py:

import hashlib
import itertools

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    float_or_none,
    int_or_none,
    join_nonempty,
    str_or_none,
    xpath_text,
)
from ..utils.traversal import traverse_obj


class YandexMusicBaseIE(InfoExtractor):
    _VALID_URL_BASE = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by|com)'
    _API_BASE = 'https://api.music.yandex.net'
    # Salt used to sign the get-mp3 download URL, see _extract_formats
    _MP3_SALT = 'XGRlBW9FXlekgbPrRHuSiA'
    _CLIENT = 'YandexMusicAndroid/24023621'

    def _get_token(self):
        return self._configuration_arg(
            'token', [None], ie_key='YandexMusic', casesense=True)[0]

    def _api_headers(self):
        headers = {'X-Yandex-Music-Client': self._CLIENT}
        token = self._get_token()
        if token:
            headers['Authorization'] = f'OAuth {token}'
        return headers

    def _call_api(self, path, item_id, note='Downloading JSON metadata', query=None,
                  data=None, fatal=True):
        response = self._download_json(
            f'{self._API_BASE}/{path}', item_id, note, fatal=fatal,
            headers=self._api_headers(), query=query, data=data)
        if not response:
            return response
        error = traverse_obj(response, ('error', ('message', 'name'), {str}, any))
        if error:
            raise ExtractorError(f'YandexMusic said: {error}', expected=True)
        return response.get('result')

    def _extract_formats(self, track, track_id):
        formats = []
        download_info = self._call_api(
            f'tracks/{track_id}/download-info', track_id,
            'Downloading track download info', fatal=False)
        only_preview = False
        for info in traverse_obj(download_info, lambda _, v: v['downloadInfoUrl']):
            if info.get('preview'):
                only_preview = True
                continue
            doc = self._download_xml(
                info['downloadInfoUrl'], track_id, 'Downloading track location XML',
                headers=self._api_headers(), fatal=False)
            if doc is None:
                continue
            host = xpath_text(doc, 'host')
            path = xpath_text(doc, 'path')
            ts = xpath_text(doc, 'ts')
            sign_salt = xpath_text(doc, 's')
            if not (host and path and ts and sign_salt):
                continue
            sign = hashlib.md5(
                (self._MP3_SALT + path[1:] + sign_salt).encode()).hexdigest()
            codec = info.get('codec') or 'mp3'
            formats.append({
                'url': f'https://{host}/get-mp3/{sign}/{ts}{path}',
                'format_id': join_nonempty(codec, int_or_none(info.get('bitrateInKbps'))),
                'ext': {'aac': 'm4a'}.get(codec, codec),
                'vcodec': 'none',
                'acodec': codec,
                'abr': int_or_none(info.get('bitrateInKbps')),
            })

        if not formats and only_preview and not self._get_token():
            self.raise_login_required(
                'Only a 30-second preview is available without authentication. '
                'Pass a Yandex OAuth token with '
                '--extractor-args "yandexmusic:token=YOUR_TOKEN" '
                '(obtain it via https://oauth.yandex.ru/authorize'
                '?response_type=token&client_id=23cabbbdc6cd418abb4b39c32c41195d)',
                method=None)
        return formats

    def _extract_artists(self, artists):
        names = traverse_obj(artists, (..., 'name', {str}))
        return ', '.join(names) or None

    def _track_info(self, track):
        track_id = str_or_none(track.get('id') or track.get('realId'))
        title = track['title']
        album = traverse_obj(track, ('albums', 0, {dict})) or {}

        cover_uri = track.get('coverUri') or album.get('coverUri')
        thumbnail = None
        if cover_uri:
            thumbnail = 'https://' + cover_uri.replace('%%', 'orig')

        artist = self._extract_artists(track.get('artists'))
        return {
            'id': track_id,
            'title': join_nonempty(artist, title, delim=' - '),
            'track': title,
            'artist': artist,
            'formats': self._extract_formats(track, track_id),
            'thumbnail': thumbnail,
            'duration': float_or_none(track.get('durationMs'), 1000),
            'filesize': int_or_none(track.get('fileSize')) or None,
            'album': album.get('title'),
            'album_artist': self._extract_artists(album.get('artists')),
            'release_year': int_or_none(album.get('year')),
            'genre': album.get('genre'),
            'disc_number': traverse_obj(album, ('trackPosition', 'volume', {int_or_none})),
            'track_number': traverse_obj(album, ('trackPosition', 'index', {int_or_none})),
        }


class YandexMusicTrackIE(YandexMusicBaseIE):
    IE_NAME = 'yandexmusic:track'
    IE_DESC = 'Яндекс.Музыка - Трек'
    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<album_id>\d+)/track/(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://music.yandex.ru/album/40733359/track/148373155',
        'info_dict': {
            'id': '148373155',
            'ext': 'mp3',
            'title': 'BEARWOLF - Феникс',
            'track': 'Феникс',
            'artist': 'BEARWOLF',
            'album': 'Феникс',
            'album_artist': 'BEARWOLF',
            'release_year': 2026,
            'genre': 'ruspop',
            'duration': 172.26,
            'disc_number': 1,
            'track_number': 1,
            'thumbnail': r're:https?://.+',
        },
        'params': {'skip_download': True},
        'skip': 'Requires a Yandex OAuth token (see --extractor-args yandexmusic:token=...)',
    }, {
        'url': 'http://music.yandex.com/album/540508/track/4878838',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        track_id = self._match_id(url)
        track = traverse_obj(
            self._call_api(f'tracks/{track_id}', track_id, 'Downloading track JSON'),
            (0, {dict}))
        if not track:
            raise ExtractorError('Unable to find track', expected=True)
        return self._track_info(track)


class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
    def _resolve_tracks(self, tracks, item_id):
        """Turn a list of (possibly short-info) playlist entries into full track dicts."""
        full, missing = [], []
        for entry in tracks:
            track = entry.get('track') if isinstance(entry, dict) else None
            if track:
                full.append(track)
                continue
            track_id = str_or_none(entry.get('id') if isinstance(entry, dict) else entry)
            if track_id:
                missing.append(track_id)

        # Bulk-resolve entries that did not ship an embedded track object
        for start in itertools.count(0, 250):
            chunk = missing[start:start + 250]
            if not chunk:
                break
            full.extend(self._call_api(
                'tracks', item_id, f'Downloading tracks JSON ({start + len(chunk)})',
                data=f'track-ids={",".join(chunk)}'.encode()) or [])
        return full

    def _build_playlist(self, tracks):
        for track in tracks:
            track_id = str_or_none(track.get('id') or track.get('realId'))
            album_id = traverse_obj(track, ('albums', 0, 'id', {str_or_none}))
            if not (track_id and album_id):
                continue
            yield self.url_result(
                f'https://music.yandex.ru/album/{album_id}/track/{track_id}',
                YandexMusicTrackIE, track_id,
                traverse_obj(track, ('title', {str})))


class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
    IE_NAME = 'yandexmusic:album'
    IE_DESC = 'Яндекс.Музыка - Альбом'
    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://music.yandex.ru/album/40733359',
        'info_dict': {
            'id': '40733359',
            'title': 'BEARWOLF - Феникс (2026)',
        },
        'playlist_mincount': 1,
    }]

    @classmethod
    def suitable(cls, url):
        return False if YandexMusicTrackIE.suitable(url) else super().suitable(url)

    def _real_extract(self, url):
        album_id = self._match_id(url)
        album = self._call_api(
            f'albums/{album_id}/with-tracks', album_id, 'Downloading album JSON')

        tracks = [track for volume in album.get('volumes') or [] for track in volume]
        title = album.get('title')
        artist = traverse_obj(album, ('artists', 0, 'name', {str}))
        if artist:
            title = f'{artist} - {title}'
        if album.get('year'):
            title += f' ({album["year"]})'

        return self.playlist_result(
            self._build_playlist(tracks), str(album['id']), title)


class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
    IE_NAME = 'yandexmusic:playlist'
    IE_DESC = 'Яндекс.Музыка - Плейлист'
    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://music.yandex.ru/users/music.partners/playlists/1245',
        'info_dict': {
            'id': '1245',
        },
        'playlist_mincount': 1,
    }, {
        'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        user, playlist_id = self._match_valid_url(url).group('user', 'id')
        playlist = self._call_api(
            f'users/{user}/playlists/{playlist_id}', playlist_id,
            'Downloading playlist JSON')

        tracks = self._resolve_tracks(playlist.get('tracks') or [], playlist_id)
        return self.playlist_result(
            self._build_playlist(tracks), playlist_id,
            playlist.get('title'), playlist.get('description'))


class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
    def _artist_name(self, artist_id):
        return traverse_obj(self._call_api(
            f'artists/{artist_id}/brief-info', artist_id,
            'Downloading artist brief info', fatal=False),
            ('artist', 'name', {str}))


class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
    IE_NAME = 'yandexmusic:artist:tracks'
    IE_DESC = 'Яндекс.Музыка - Артист - Треки'
    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/tracks'

    _TESTS = [{
        'url': 'https://music.yandex.ru/artist/21022190/tracks',
        'info_dict': {
            'id': '21022190',
        },
        'playlist_mincount': 1,
    }]

    def _real_extract(self, url):
        artist_id = self._match_id(url)
        tracks = []
        for page in itertools.count(0):
            data = self._call_api(
                f'artists/{artist_id}/tracks', artist_id,
                f'Downloading artist tracks page {page + 1}',
                query={'page': page, 'page-size': 100})
            page_tracks = data.get('tracks') or []
            tracks.extend(page_tracks)
            total = traverse_obj(data, ('pager', 'total', {int_or_none}))
            if not page_tracks or (total is not None and len(tracks) >= total):
                break

        artist = self._artist_name(artist_id)
        return self.playlist_result(
            self._build_playlist(tracks), artist_id,
            join_nonempty(artist or artist_id, 'Треки', delim=' - '))


class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
    IE_NAME = 'yandexmusic:artist:albums'
    IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/albums'

    _TESTS = [{
        'url': 'https://music.yandex.ru/artist/21022190/albums',
        'info_dict': {
            'id': '21022190',
        },
        'playlist_mincount': 1,
    }]

    def _real_extract(self, url):
        artist_id = self._match_id(url)
        albums = []
        for page in itertools.count(0):
            data = self._call_api(
                f'artists/{artist_id}/direct-albums', artist_id,
                f'Downloading artist albums page {page + 1}',
                query={'page': page, 'page-size': 100})
            page_albums = data.get('albums') or []
            albums.extend(page_albums)
            total = traverse_obj(data, ('pager', 'total', {int_or_none}))
            if not page_albums or (total is not None and len(albums) >= total):
                break

        entries = []
        for album in albums:
            album_id = traverse_obj(album, ('id', {str_or_none}))
            if album_id:
                entries.append(self.url_result(
                    f'https://music.yandex.ru/album/{album_id}',
                    YandexMusicAlbumIE, album_id))

        artist = self._artist_name(artist_id)
        return self.playlist_result(
            entries, artist_id, join_nonempty(artist or artist_id, 'Альбомы', delim=' - '))