淘先锋技术网

首页 1 2 3 4 5 6 7

b站详情视频下载

经过测试,理论上所有视频都可以下载,付费的视频需要登录之后,带上cookie信息,就可以下载了,

1、环境

1.1Python 运行环境

1.2FFmpeg 合并视频和音频

2、代码

清单文件

requests==2.21.0
lxml==4.3.0
from lxml import etree
import requests
import subprocess
import json
import re
import os


class BiBiSpider:

    def __init__(self):
        self.headers = {
            'Referer': 'https://www.bilibili.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
            "cookie": "_uuid=B75117CA-056D-136C-81C4-27DB1D06377A30553infoc; buvid3=3E3696DA-F5F3-4F55-A6AB-F0428459B3D9155836infoc; CURRENT_FNVAL=16; LIVE_BUVID=AUTO6415809639729886; rpdid=|(k|~u|YY)R)0J'ul)|mYJm)~; CURRENT_QUALITY=80; _ga=GA1.2.1444362150.1589713917; bsource=seo_baidu; sid=jceqdrok; PVID=1; DedeUserID=318464066; DedeUserID__ckMd5=2d1368bb369e1d79; SESSDATA=6c514b9f%2C1605405848%2C76c7d*51; bili_jct=208edda9bf1d5248ec6035c44959ec0e",
        }

        self.video_dirs = 'video'

    def download_file(self, file_path, download_url):
        print('*' * 100)
        print(f"保存路径:{file_path}")
        print(f'下载URL:{download_url}')
        response = requests.get(url=download_url, headers=self.headers, stream=True)
        content_size = int(response.headers["content-length"])  # 视频内容的总大小
        size = 0
        with open(file_path, "wb") as file:  # 非纯文本都以字节的方式写入
            for data in response.iter_content(chunk_size=1024):  # 循环写入
                file.write(data)  # 写入视频文件
                file.flush()  # 刷新缓存
                size += len(data)  # 叠加每次写入的大小
                # 打印下载进度
                print("\r文件下载进度:%d%%(%0.2fMB/%0.2fMB)" % (
                    float(size / content_size * 100), (size / 1024 / 1024),
                    (content_size / 1024 / 1024)),
                      end=" ")
        print()

    def get_response(self, url):
        response = None
        try:
            response = requests.get(url, headers=self.headers)
        except Exception as e:
            print(e)
        return response

    def parse_detail(self, url):
        response = self.get_response(url)
        if not response:
            return
        html = response.text
        document = etree.HTML(html)
        title = ''.join(document.xpath('//*[@class="video-title"]/@title'))
        if not title:
            title = ''.join(document.xpath('//*[@class="media-wrapper"]/h1/@title'))

        title = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "-", title)
        pattern = r'\<script\>window\.__playinfo__=(.*?)\</script\>'
        result = re.findall(pattern, html)
        if len(result) < 1:
            return None
        result = result[0]
        data = json.loads(result)
        durl_list = data['data'].get('durl')
        if durl_list:
            for durl in durl_list:
                url = durl['url']
                file_path = 'video/1.mp4'
                self.download_file(file_path, url)
        else:

            merge_path = f'{self.video_dirs}/{title}.mp4'
            video_url = data['data']['dash']['video'][0]['baseUrl']
            video_path = f'{self.video_dirs}/{title}.m4s'
            self.download_file(video_path, video_url)

            audio_url = data['data']['dash']['audio'][0]['baseUrl']
            audio_path = f'{self.video_dirs}/{title}.mp3'
            self.download_file(audio_path, audio_url)

            self.merge_video_and_audio(video_path, audio_path, merge_path)

    def merge_video_and_audio(self, video_path, audio_path, merge_path):
        cmd = f'ffmpeg -i {video_path} -i {audio_path} -vcodec copy -acodec copy {merge_path}'
        subprocess.call(cmd, shell=True)
        print(merge_path, '合并完成')
        os.remove(video_path)
        os.remove(audio_path)

    def start_requests(self):
        url = 'https://www.bilibili.com/bangumi/play/ep306470'
        self.parse_detail(url)

    def run(self):
        self.start_requests()


if __name__ == '__main__':
    BiBiSpider().run()