mts-music.py

MTS music downloader. Hashing algorithm was taken from https://github.com/llistochek/yandex-music-downloader
[raw]

001: #!/bin/env python
002: import lxml.html as lh
003: from bs4 import BeautifulSoup
004: from lxml import etree
005: import urllib.request as rq
006: from urllib.error import HTTPError
007: import json
008: import hashlib
009: from tqdm import tqdm
010: import sys, os
011: # @desc MTS music downloader. Hashing algorithm was taken from https://github.com/llistochek/yandex-music-downloader
012: 
013: MTS_MUSIC = "https://music.mts.ru"
014: """
015: How to get ya_token:
016:         1. go to https://music.mts.ru
017:         2. log in to your account with premium
018:         3. open DevTools(ctrl+shift+I) 
019:         4. at Storage tab look for Local Storage data for https://music.mts.ru
020:         5. copy string with key ya_token
021:         6. paste it to ya_token.txt
022: """
023: YA_TOKEN = None 
024: try:
025:         with open('ya_token.txt') as f:
026:                 YA_TOKEN = f.readline().strip()
027: except OSError:
028:         print("ya_token.txt not found. Defaulting to preview download")
029: MD5_SALT = 'XGRlBW9FXlekgbPrRHuSiA'
030: 
031: 
032: class TqdmUpTo(tqdm):
033:         """Alternative Class-based version of the above.
034: 
035:         Provides `update_to(n)` which uses `tqdm.update(delta_n)`.
036: 
037:         Inspired by [twine#242](https://github.com/pypa/twine/pull/242),
038:         [here](https://github.com/pypa/twine/commit/42e55e06).
039:         """
040: 
041:         def update_to(self, b=1, bsize=1, tsize=None):
042:                 """
043:                 b  : int, optional
044:                         Number of blocks transferred so far [default: 1].
045:                 bsize  : int, optional
046:                         Size of each block (in tqdm units) [default: 1].
047:                 tsize  : int, optional
048:                         Total size (in tqdm units). If [default: None] remains unchanged.
049:                 """
050:                 if tsize is not None:
051:                         self.total = tsize
052:                 return self.update(b * bsize - self.n)  # also sets self.n = b * bsize
053: 
054: 
055: def download_file(url, filename):
056:         with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
057:                                   desc=filename) as t:
058:                 rq.urlretrieve(url, filename, reporthook=t.update_to, data=None)
059:                 t.total = t.n
060: 
061: 
062: def get_album_info(album_id):
063:         s = BeautifulSoup(rq.urlopen(MTS_MUSIC+f'/album/{album_id}'),'lxml')
064:         return json.loads(s.find(id='__NEXT_DATA__').text)['props']['pageProps'][
065:                         'albumItem']
066: def get_playlist_info(playlist_id):
067:         s = BeautifulSoup(rq.urlopen(MTS_MUSIC+f'/playlist/{playlist_id}'),'lxml')
068:         print(s.find(id='__NEXT_DATA__').text)
069:         return json.loads(s.find(id='__NEXT_DATA__').text)['props']['pageProps'][
070:                         'playlistItem']
071:         
072: 
073: 
074: def get_download_info(track_id, oauth_token):
075:         req = rq.Request(f"{MTS_MUSIC}/ya_api/tracks/{track_id}/download-info")
076:         if oauth_token:
077:                 req.add_header("Authorization", f"OAuth {oauth_token}")
078:         dlinfo = json.load(rq.urlopen(req))
079:         return dlinfo['result']
080: 
081: 
082: def get_file_download_Link(dlinfo):
083:         url_info = json.load(rq.urlopen(dlinfo+"&format=json"))
084:         url_info['hash'] = hashlib.md5((MD5_SALT+url_info['path'][1:]+url_info['s']
085:                                                                         ).encode()).hexdigest()
086:         return "https://{host}/get-mp3/{hash}/{ts}{path}".format_map(url_info)
087: 
088: 
089: album_id = sys.argv[1] if len(sys.argv) > 1 else "5899311"
090: album_info = None
091: if '/' in album_id:
092:         album_info = get_playlist_info(album_id)
093:         album_info['artists']=[]
094: else:
095:         album_info = get_album_info(album_id)
096: tracks = album_info['tracks']
097: print(album_info)
098: album_dir = album_info['title']
099: os.makedirs(album_dir, exist_ok=True)
100: coverfname = f"{album_dir}/cover.jpg"
101: download_file(album_info['cover'], coverfname)
102: coverdata = None
103: with open(coverfname, 'rb') as f:
104:         coverdata = f.read()
105: for i, trk in enumerate(tracks):
106:         try:
107:                 trkid = trk['id']
108:                 trkname = ",".join(a['name'] for a in trk['artists'])+' - '+trk['title']
109:                 dlinfos = get_download_info(trkid, YA_TOKEN)
110:                 best_info = max(dlinfos,
111:                                                 key=lambda x: x['bitrateInKbps'])
112:                 trkformat = best_info['codec']
113:                 best_link = best_info['downloadInfoUrl']
114:                 filename = f"{album_dir}/{i:02d}. {trkname.replace('/','_')}.{trkformat}"
115:                 download_file(get_file_download_Link(best_link), filename)
116:                 try:
117:                         import eyed3
118:                         file = eyed3.load(filename)
119:                         file.initTag()
120:                         file.tag.album = album_info['title']
121:                         file.tag.artist = ",".join(a['name'] for a in trk['artists'])
122:                         file.tag.album_artist = ",".join(a['name'] for a in album_info['artists'])
123:                         file.tag.title = trk['title']
124:                         file.tag.track_num = i+1
125:                         file.tag.images.set(eyed3.id3.frames.ImageFrame.FRONT_COVER, coverdata, 'image/jpeg')
126:                         file.tag.save()
127:                 except ImportError:
128:                         print("eyeD3 python module not found: ID3 tags are not set")
129:         except HTTPError as e:
130:                 print('failed to download',trk,e)
131: