当然这个是付费的一部书,所以如果你浏览器不带 会员的cookie是访问不到的,其中的数字 244130607,这个在他们的接口中叫做 trackId, 每个音频文件对应唯一的一个 trackId
群里有志同道合的小伙伴,互帮互助, 群里有不错的视频学习教程和PDF!
{ "ret": 0, "msg": "0", "trackId": 244130607, "uid": 170217760, "albumId": 30816438, "title": "《三体》第一季 第十集 聚会与大撕裂", "domain": "http://audiopay.cos.xmcdn.com", "totalLength": 12780565, "sampleDuration": 0, "sampleLength": 0, "isAuthorized": true, "apiVersion": "1.0.0", "seed": 9583, "fileId": "https://files.jxasp.com/image/27*31*44*62*1*8*6*48*52*4*6*17*16*6*35*35*6*43*25*27*48*63*58*4*50*47*60*64*15*39*59*49*2*36*48*48*16*58*18*44*2*32*12*7*52*64*51*26*29*4*22*", "buyKey": "617574686f72697a6564", "duration": 1578, "ep": "https://files.jxasp.com/image/20NvOoh6T39X3qwKO4cY5g5bVhg+1nfPHIQafFTmCXihnrqF2PjczO8O0auK1KJhDrJ30XMYfKJo2uz+xgwd3rwRPi5f", "highestQualityLevel": 1, "downloadQualityLevel": 1, "authorizedType": 1 }
这里,我充会员了,所以可以直接用浏览器中打开这个url,其中有用的字段有了只有几个 seed和 fileId两个通过js加密算法计算出 m4a的路径,并拼接主域名,然后 ep 经过另一个加密算法得到url的访问参数buy_key sign token timestamp,最后将它们拼接到一起才是一个完整的 音频的url
经过我调试我分别找到了这两个加密的 js算法
- 计算 m4a的路径js算法:
function vt(t) { this._randomSeed = t, this.cg_hun() } vt.prototype = { cg_hun: function() { this._cgStr = ""; var t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890" , e = t.length , n = 0; for (n = 0; n < e; n++) { var r = this.ran() * t.length , o = parseInt(r); this._cgStr += t.charAt(o), t = t.split(t.charAt(o)).join("") } }, cg_fun: function(t) { t = t.split("*"); var e = "" , n = 0; for (n = 0; n < t.length - 1; n++) e += this._cgStr.charAt(t[n]); return e }, ran: function() { this._randomSeed = (211 * this._randomSeed + 30031) % 65536; return this._randomSeed / 65536 }, }; c = function(t, e) { var n = new vt(t).cg_fun(e); return "/" === n[0] ? n : "/".concat(n) } console.log(c(9583,"https://files.jxasp.com/image/27*31*44*62*1*8*6*48*52*4*6*17*16*6*35*35*6*43*25*27*48*63*58*4*50*47*60*64*15*39*59*49*2*36*48*48*16*58*18*44*2*32*12*7*52*64*51*26*29*4*22*"))
用node跑一下可以得到 m4a的路径
- 通过ep来计算url参数的js算法:
Z = function() { throw new TypeError("Invalid attempt to destructure non-iterable instance") } J = function(t, e) { var n = [] , r = !0 , o = !1 , i = void 0; try { for (var a, u = t[Symbol.iterator](); !(r = (a = u.next()).done) && (n.push(a.value), !e || n.length !== e); r = !0) ; } catch (t) { o = !0, i = t } finally { try { r || null == u.return || u.return() } finally { if (o) throw i } } return n } Q = function(t) { if (Array.isArray(t)) return t } tt = function(t, e) { return Q(t) || J(t, e) || Z() } function yt(t, e) { for (var n, r = [], o = 0, i = "", a = 0; 256 > a; a++) r[a] = a; for (a = 0; 256 > a; a++) o = (o + r[a] + t.charCodeAt(a % t.length)) % 256, n = r[a], r[a] = r[o], r[o] = n; for (var u = o = a = 0; u < e.length; u++) o = (o + r[a = (a + 1) % 256]) % 256, n = r[a], r[a] = r[o], r[o] = n, i += String.fromCharCode(e.charCodeAt(u) ^ r[(r[a] + r[o]) % 256]); return i } var mt = yt("xm", "Ä[üJ=†Û3áf÷N") gt = [19, 1, 4, 7, 30, 14, 28, 8, 24, 17, 6, 35, 34, 16, 9, 10, 13, 22, 32, 29, 31, 21, 18, 3, 2, 23, 25, 27, 11, 20, 5, 15, 12, 0, 33, 26] bt = function(t) { var e1 = yt( function(t, e) { for (var n = [], r = 0; r < t.length; r++) { for (var o = "a" <= t[r] && "z" >= t[r] ? t[r].charCodeAt() - 97 : t[r].charCodeAt() - "0".charCodeAt() + 26, i = 0; 36 > i; i++) if (e[i] == o) { o = i; break } n[r] = 25 < o ? String.fromCharCode(o - 26 + "0".charCodeAt()) : String.fromCharCode(o + 97) } return n.join("") }("d" + mt + "9",gt) , e2 = function(t) { if (!t) return ""; var e, n, r, o, i, a = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1]; for (o = (t = t.toString()).length, r = 0, i = ""; r < o; ) { do { e = a[255 & t.charCodeAt(r++)] } while (r < o && -1 == e);if (-1 == e) break; do { n = a[255 & t.charCodeAt(r++)] } while (r < o && -1 == n);if (-1 == n) break; i += String.fromCharCode(e << 2 | (48 & n) >> 4); do { if (61 == (e = 255 & t.charCodeAt(r++))) return i; e = a[e] } while (r < o && -1 == e);if (-1 == e) break; i += String.fromCharCode((15 & n) << 4 | (60 & e) >> 2); do { if (61 == (n = 255 & t.charCodeAt(r++))) return i; n = a[n] } while (r < o && -1 == n);if (-1 == n) break; i += String.fromCharCode((3 & e) << 6 | n) } return i }(t) ).split("-") console.log(e1) } var c = bt("https://files.jxasp.com/image/20NvOoh6T39X3qwKO4cY5g5bVhg+1nfPHIQafFTmCXihnrqF2PjczO8O0auK1KJhDrJ30XMYfKJo2uz+xgwd3rwRPi5f")
这段js比较复杂,调试的时候坑死我了,不在同一个地方,导致我来回复制,最终才把这个算法整理到这一个js文件中,依然用 node跑一下,输出:
- [
- '617574686f72697a6564',
- 'ef9a0678d77870843ef203d6333ce021',
- '5790',
- '1598533668'
- ]
这几个参数分别对应的是:buy_key sign token timestamp
有了这两个js算法就可以完全地解析 这个接口返回的参数了。
python 代码仿写加密算法
- 计算 m4a路径加密算法
class vt(): def __init__(self,t): self._randomSeed = t self.cg_hun() def ran(self): self._randomSeed = (211 * self._randomSeed + 30031) % 65536 return self._randomSeed / 65536 def cg_hun(self): self._cgStr = "" t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890" e = len(t) n = 0 for i in range(e): r = self.ran() * len(t) o = int(r) self._cgStr += t[o] t = "".join(t.split(t[o])) def cg_fun(self,t): t = [int(i) if i else 0 for i in t.split("*")] e = "" n = 0; for n in range(n,len(t)-1): e += self._cgStr[t[n]] return e def path_decode(seed,fileId): c = vt(seed) p = c.cg_fun(fileId) return p if __name__ == '__main__': result = path_decode(9583,"https://files.jxasp.com/image/27*31*44*62*1*8*6*48*52*4*6*17*16*6*35*35*6*43*25*27*48*63*58*4*50*47*60*64*15*39*59*49*2*36*48*48*16*58*18*44*2*32*12*7*52*64*51*26*29*4*22*") print(result) 通过ep来计算url参数的算法: def yt(t, e): r = [0 for i in range(256)] o = 0 i = "" for a in range(0,256): r[a] = a; for a in range(0,256): o = (o + r[a] + ord(t[a % len(t)])) % 256 n = r[a] r[a] = r[o] r[o] = n u = 0 o = 0 a = 0 for u in range(0,len(e)): a = (a + 1) % 256 o = (o + r[a]) % 256 n = r[a] r[a] = r[o] r[o] = n i += chr(ord(e[u]) ^ r[(r[a] + r[o]) % 256]) return i def bt(t): def arg1(t,e): n = [' ' for i in range(256)] for r in range(0,len(t)): if "a" <= t[r] and "z" >= t[r]: o = ord(t[r]) - 97 else: o = ord(t[r]) - ord("0") + 26 for i in range(0,36): if (e[i] == o): o = i break if 25< o: n[r] = chr(o - 26 + ord("0")) else: n[r] = chr(o + 97) return "".join(n).strip() a1 = arg1("d" + mt + "9", gt) def arg2(t): if not t: return "" e = n = r = o = i = a = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1]; o = len(t) i = "" r = 0 while r < o: while True: e = a[255 & ord(t[r])] r += 1 if not (r < o and -1 == e): break if (-1 == e): break while True: n = a[255 & ord(t[r])] r += 1 if not (r < o and -1 == n): break if (-1 == n): break i += chr(e << 2 | (48 & n) >> 4) while True: e = (255 & ord(t[r])) if 61 == e: return i r += 1 e = a[e] if not (r < o and -1 == e): break if (-1 == e): break i += chr((15 & n) << 4 | (60 & e) >> 2); while True: n = (255 & ord(t[r])) if (61 == n): return i r += 1 n = a[n] if not (r < o and -1 == n): break if (-1 == n): break i += chr((3 & e) << 6 | n) return i a2 = arg2(t) buy_key,sign,token,timestamp = yt(a1,a2).split('-') data = dict( buy_key=buy_key, sign=sign, token=token, timestamp=timestamp, ) return data mt = yt("xm", "Ä[üJ=†Û3áf÷N") gt = [19, 1, 4, 7, 30, 14, 28, 8, 24, 17, 6, 35, 34, 16, 9, 10, 13, 22, 32, 29, 31, 21, 18, 3, 2, 23, 25, 27, 11, 20, 5, 15, 12, 0, 33, 26] def ep_decode(ep): data = bt(ep) return data if __name__ == '__main__': print(ep_decode('20NvOoh6T39X3qwKO4cY5g5bVhg+1nfPHIQafFTmCXihnrqF2PjczO8O0auK1KJhDrJ30XMYfKJo2uz+xgwd3rwRPi5f'))
{ "ret": 200, "data": { "trackId": 324681559, "canPlay": true, "isPaid": false, "hasBuy": true, "src": "https://aod.cos.tx.xmcdn.com/group84/M03/4A/A6/wKg5Hl8s0cTwcp6xABQ0EbeuW5Q193.m4a", "albumIsSample": false, "sampleDuration": 48, "isBaiduMusic": false, "firstPlayStatus": true, "isVipFree": false } }
这个接口还是比较简单的,返回值里面直接包含 m4a音频地址,没有加密措施,另外 url中的数字依然是 trackId,值得一提的是免费音频的trackId不能用在付费接口,我猜测是版本迭代的问题,或者是客户端不同的问题,因为当时我不只是分析网页的接口,还抓包了电脑客户端的接口,具体对应的是网页还是客户端我也忘了。
喜马拉雅接口主要关键的有两个参数,一个是前面我说的 trackId 另一个就是albumId,trackId 对应唯一的一个音频,而 albumId 对应的是唯一的一本书。
import requests import time import hashlib import random import json from requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) # 获取sign签名 def get_sign(headers): serverTimeUrl = "https://www.ximalaya.com/revision/time" response = requests.get(serverTimeUrl,headers=headers,verify=False) serverTime = response.text nowTime = str(round(time.time()*1000)) sign = str(hashlib.md5("himalaya-{}".format(serverTime).encode()).hexdigest()) + "({})".format(str(round(random.random()*100))) + serverTime + "({})".format(str(round(random.random()*100))) + nowTime headers["xm-sign"] = sign return headers def get_header(): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36" } headers = get_sign(headers) return headers if __name__ == '__main__': # 这是一个搜索接口 url = "https://www.ximalaya.com/revision/search/main?core=all&spellchecker=true&device=iPhone&kw=%E9%9B%AA%E4%B8%AD%E6%82%8D%E5%88%80%E8%A1%8C&page=1&rows=20&condition=relation&fq=&paidFilter=false" s = requests.get(url,headers=get_header(),verify=False) print(s.json())
我写了 喜马拉雅 扫码登陆的脚本,因为我不能每次都去复制浏览器中的 cookie,这种重复劳动太傻了
import requests import re from threading import Thread import time import requests from io import BytesIO import http.cookiejar as cookielib from PIL import Image import sys import psutil from base64 import b64decode import os requests.packages.urllib3.disable_warnings() class show_code(Thread): def __init__(self,data): Thread.__init__(self) self.data = data def run(self): img = Image.open(BytesIO(self.data)) # 打开图片,返回PIL image对象 img.show() def is_login(session): headers = {'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36"} url = "https://www.ximalaya.com/revision/main/getCurrentUser" try: session.cookies.load(ignore_discard=True) except Exception: pass response = session.get(url,verify=False,headers=headers) if response.json()['ret'] == 200: print(response.json()) return session,True else: return session,False def login(): if not os.path.exists(".cookie"): os.makedirs('.cookie') if not os.path.exists('.cookie/xmly.txt'): print("hello") with open(".cookie/xmly.txt",'w') as f: f.write("") session = requests.session() session.cookies = cookielib.LWPCookieJar(filename='.cookie/xmly.txt') session,status = is_login(session) if not status: url = "https://passport.ximalaya.com/web/qrCode/gen?level=L" response = session.get(url,verify=False) data = response.json() # with open('qrcode.jpg','wb') as f: # f.write(b64decode(data['img'])) t= show_code(b64decode(data['img'])) t.start() qrId = data['qrId'] url = 'https://passport.ximalaya.com/web/qrCode/check/%s/%s' % (qrId,int(time.time()*1000)) while 1: response = session.get(url,verify=False) data = response.json() # code = re.findall("window.wx_code='(.*?)'",response.text) # sys.exit() if data['ret'] == 0: # for proc in psutil.process_iter(): # 遍历当前process # try: # if proc.name() == "Microsoft.Photos.exe": # proc.kill() # 关闭该process # except Exception as e: # print(e) break time.sleep(1) session.cookies.save() return session if __name__ == '__main__': login()
session = login()