python 喜马拉雅下载 20200405有效

参考链接 浏览器 f12 network网络里面查看各种请求和返回
0、请求 URL: https://www.ximalaya.com/revision/album?albumId=19753959
请求方法: GET 获取专辑总音频数,可知页数

{
    "ret": 200,
    "msg": "成功",
    "data": {
            "mainInfo": {
                 "albumStatus": 1,
                 "showApplyFinishBtn": false,
                 "showEditBtn": false,
                 "showTrackManagerBtn": false,
                 "showInformBtn": true,
                 "cover": "//imagev2.xmcdn.com/group42/M0A/0B/62/wKgJ9FrV-22TLLQLAABoJqGyCDw207.jpg",
                 "albumTitle": "JimmyShow吉米秀 - 访谈专栏"
            }
            "tracksInfo": {
                "trackTotalCount": 38,
                "sort": 1,
                "tracks": [
                    {
                    "index": 38,
                    "trackId":1111,,,,
                      }
                 ]
               }
    }
}

1、获取专辑list
请求 URL: https://www.ximalaya.com/revision/album/v1/getTracksList?albumId=19753959&pageNum=1
请求方法: GET

{
    "ret": 200,
    "data": {
        "currentUid": 163357056,
        "albumId": 19753959,
        "trackTotalCount": 38,
        "sort": 1,
        "tracks": [
            {
                "index": 38,
                "trackId": 147521319,
                "isPaid": false,
                "tag": 0,
                "title": "039 Why Horses Can't Wear Flip Flops",
                "playCount": 1729,
                "showLikeBtn": true,
                "isLike": false,
                "showShareBtn": true,
                "showCommentBtn": true,
                "showForwardBtn": true,
                "createDateFormat": "1年前",
                "url": "/ertong/19753959/147521319",
                "duration": 1508,
                "isVideo": false,
                "videoCover": null,
                "isVipFirst": false,
                "breakSecond": 385,
                "length": 1508
            },.....
 ],
        "pageNum": 1,
        "pageSize": 30,
        "superior": [],
        "lastPlayTrackId": 147521319
}

2、请求 URL: https://www.ximalaya.com/revision/track/trackPageInfo?trackId=147521319
请求方法: GET
无啥用
3、 请求 URL: https://www.ximalaya.com/revision/play/v1/audio?id=147521319&ptype=1
请求方法: GET 获取m4a地址

{
    "ret": 200,
    "data": {
        "trackId": 147521319,
        "canPlay": true,
        "isPaid": false,
        "hasBuy": true,
        "src": "https://fdfs.xmcdn.com/group52/M03/0C/45/wKgLcFwhqOCgYeGHALpHth2twRw024.m4a",
        "albumIsSample": false,
        "sampleDuration": 0,
        "isBaiduMusic": false,
        "firstPlayStatus": true
    }
}

ximalaya_m4a_download.py.txt
喜马拉雅接口.txt

# -*- coding: utf-8 -*-
"""
Created on Sun Apr  5 11:58:25 2020

@author: 54861
"""

import requests
import math
import os
import re
import time

headers = {
        #'Referer': start_url,  # 注意加上referer
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
    }

"""
获取总视频数
参数:albumID  专辑ID
"""
def get_album_number(albumID):
    url = "https://www.ximalaya.com/revision/album?albumId="+str(albumID)
    resp = requests.get(url,headers = headers)
    result = resp.json()
    if result['ret'] == 200:
        cover = result['data']['mainInfo']['cover']
        albumTitle = result['data']['mainInfo']['albumTitle']
        count = result['data']['tracksInfo']['trackTotalCount']
    else:
        cover=''
        albumTitle=''
        count = 0
    return cover,albumTitle,count
    
"""
获取一页音频信息
参数:albumID  专辑ID
参数:pageNum  !!页码1开始!!
"""
def get_track_list(albumID, pageNum):
    trackList = []
    url = "https://www.ximalaya.com/revision/album/v1/getTracksList?albumId="+str(albumID)+"&pageNum="+str(pageNum)
    resp = requests.get(url,headers = headers)
    result = resp.json()
    if result['ret'] == 200:
        tracks = result['data']['tracks']
        for track in tracks:
            trackList.append({'trackId':track['trackId'],'title':track['title']})
    return trackList

"""
获取音频下载地址
参数:trackID  音频ID
"""
def get_track_url(trackID):
    url = "https://www.ximalaya.com/revision/play/v1/audio?id="+str(trackID)+"&ptype=1"
    resp = requests.get(url,headers = headers)
    result = resp.json()
    if result['ret'] == 200:
        src = result['data']['src']
        if len(src) > 0:
            return src
        
   
"""
下载音频
参数:url  音频下载地址
参数:file 文件本地路径
"""
def download_track(url,file):
    resp = requests.get(url, headers = headers, stream = True)
    with open(file, 'wb') as f:
        for data in resp.iter_content(chunk_size = 1024):
            if data: f.write(data)
 
"""
从一个字符串得到符合windows要求的文件名
"""    
def good_win_filename(title):
    rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
    new_title = re.sub(rstr, "_", title) # 替换为下划线
    return new_title
    
if __name__ == '__main__':
    
    #albumIds = []
    #albumIds = [5943599,3179138,13742743,403231]
    for albumId in albumIds:
        print ("start downloading:" + str(albumId)+" of"+str(albumIds))
        coverUrl,albumTitle,total = get_album_number(albumId)
        dir = 'ximalaya_m4a/'
        if not os.path.exists(dir):#创建专辑文件夹
            os.makedirs(dir)
        dir = 'ximalaya_m4a/'+str(albumId)+"_"+good_win_filename(albumTitle)+'/'
        if not os.path.exists(dir):#创建专辑文件夹
            os.makedirs(dir)
        download_track("http:"+coverUrl,dir+"cover"+coverUrl[coverUrl.rindex('.'):])#下载封面
        
        pageCount = math.ceil( total/30)
        i = 1
        for pageNum in range(0,pageCount):
            time.sleep(1)#防止短语音文件下载过快,被服务器断开链接
            trackList = get_track_list(albumId,pageNum+1)
            for track in trackList:
                trackUrl = get_track_url(track['trackId'])
                time.sleep(1)#防止短语音文件下载过快,被服务器断开链接
                if trackUrl:
                    ext = trackUrl[trackUrl.rindex('.'):]
                    file_path = dir + good_win_filename(track['title']) + ext
                    print ('downloading file:'+str(i)+" of "+str(total)+', page '+str(pageNum+1)+" of "+str(pageCount)+"\n"+ trackUrl + "\n"+track['title'])
                    download_track(trackUrl, file_path)
                i = i + 1
        print("\n\n"+str(i-1)+" files downed in:"+dir)    
        
发表新评论