mts-music.py
MTS music downloader. Hashing algorithm was taken from https://github.com/llistochek/yandex-music-downloader
001:
002: import lxml.html as lh
003: from bs4 import BeautifulSoup
004: from lxml import etree
005: import urllib.request as rq
006: from urllib.error import HTTPError
007: import json
008: import hashlib
009: from tqdm import tqdm
010: import sys, os
011:
012:
013: MTS_MUSIC = "https://music.mts.ru"
014:
015:
016:
017:
018:
019:
020:
021:
022:
023: YA_TOKEN = None
024: try:
025: with open('ya_token.txt') as f:
026: YA_TOKEN = f.readline().strip()
027: except OSError:
028: print("ya_token.txt not found. Defaulting to preview download")
029: MD5_SALT = 'XGRlBW9FXlekgbPrRHuSiA'
030:
031:
032: class TqdmUpTo(tqdm):
033:
034:
035:
036:
037:
038:
039:
040:
041: def update_to(self, b=1, bsize=1, tsize=None):
042:
043:
044:
045:
046:
047:
048:
049:
050: if tsize is not None:
051: self.total = tsize
052: return self.update(b * bsize - self.n)
053:
054:
055: def download_file(url, filename):
056: with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
057: desc=filename) as t:
058: rq.urlretrieve(url, filename, reporthook=t.update_to, data=None)
059: t.total = t.n
060:
061:
062: def get_album_info(album_id):
063: s = BeautifulSoup(rq.urlopen(MTS_MUSIC+f'/album/{album_id}'),'lxml')
064: return json.loads(s.find(id='__NEXT_DATA__').text)['props']['pageProps'][
065: 'albumItem']
066: def get_playlist_info(playlist_id):
067: s = BeautifulSoup(rq.urlopen(MTS_MUSIC+f'/playlist/{playlist_id}'),'lxml')
068: print(s.find(id='__NEXT_DATA__').text)
069: return json.loads(s.find(id='__NEXT_DATA__').text)['props']['pageProps'][
070: 'playlistItem']
071:
072:
073:
074: def get_download_info(track_id, oauth_token):
075: req = rq.Request(f"{MTS_MUSIC}/ya_api/tracks/{track_id}/download-info")
076: if oauth_token:
077: req.add_header("Authorization", f"OAuth {oauth_token}")
078: dlinfo = json.load(rq.urlopen(req))
079: return dlinfo['result']
080:
081:
082: def get_file_download_Link(dlinfo):
083: url_info = json.load(rq.urlopen(dlinfo+"&format=json"))
084: url_info['hash'] = hashlib.md5((MD5_SALT+url_info['path'][1:]+url_info['s']
085: ).encode()).hexdigest()
086: return "https://{host}/get-mp3/{hash}/{ts}{path}".format_map(url_info)
087:
088:
089: album_id = sys.argv[1] if len(sys.argv) > 1 else "5899311"
090: album_info = None
091: if '/' in album_id:
092: album_info = get_playlist_info(album_id)
093: album_info['artists']=[]
094: else:
095: album_info = get_album_info(album_id)
096: tracks = album_info['tracks']
097: print(album_info)
098: album_dir = album_info['title']
099: os.makedirs(album_dir, exist_ok=True)
100: coverfname = f"{album_dir}/cover.jpg"
101: download_file(album_info['cover'], coverfname)
102: coverdata = None
103: with open(coverfname, 'rb') as f:
104: coverdata = f.read()
105: for i, trk in enumerate(tracks):
106: try:
107: trkid = trk['id']
108: trkname = ",".join(a['name'] for a in trk['artists'])+' - '+trk['title']
109: dlinfos = get_download_info(trkid, YA_TOKEN)
110: best_info = max(dlinfos,
111: key=lambda x: x['bitrateInKbps'])
112: trkformat = best_info['codec']
113: best_link = best_info['downloadInfoUrl']
114: filename = f"{album_dir}/{i:02d}. {trkname.replace('/','_')}.{trkformat}"
115: download_file(get_file_download_Link(best_link), filename)
116: try:
117: import eyed3
118: file = eyed3.load(filename)
119: file.initTag()
120: file.tag.album = album_info['title']
121: file.tag.artist = ",".join(a['name'] for a in trk['artists'])
122: file.tag.album_artist = ",".join(a['name'] for a in album_info['artists'])
123: file.tag.title = trk['title']
124: file.tag.track_num = i+1
125: file.tag.images.set(eyed3.id3.frames.ImageFrame.FRONT_COVER, coverdata, 'image/jpeg')
126: file.tag.save()
127: except ImportError:
128: print("eyeD3 python module not found: ID3 tags are not set")
129: except HTTPError as e:
130: print('failed to download',trk,e)
131: