前言 ?
大家早好、午好、晚好吖~
需要的开发环境以及模块:
-
python 3.6
-
pycharm
-
requests
-
re
-
os
import pprint
import requests # 第三方模块,是需要我们 pip install requests
import re # 内置模块 是不需要安装
def change_title(title):
mode = re.compile(r'[\\\/\:\*\?\<\>\|\"]')
new_title = re.sub(mode, '_', title)
return new_title
def get_video_url(video_id):
# format() 'string{}'.format(video_id)
html_url = f'https://www..com/videoStatus.jsp?contId={video_id}&mrd=0.179849252514223'
headers_1 = {
'Referer': f'https: // www..com / video_{video_id}',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
}
response_1 = requests.get(url=html_url, headers=headers_1)
# pprint.pprint(response_1.json())
src_url = response_1.json()['videoInfo']['videos']['srcUrl']
string_1 = '-'.join(src_url.split('-')[1:])
string_2 = '/'.join(src_url.split('/')[:6]) # https://video..com/mp4/adshorthttps://files.jxasp.com/image/20210307/
# https://video..com/mp4/adshorthttps://files.jxasp.com/image/20210307/cont-1722477-15624845_adpkg-ad_hd.mp4 真的视频地址
# https://video..com/mp4/adshorthttps://files.jxasp.com/image/20210307/1615275932043-15624807_adpkg-ad_hd.mp4 假的视频地址
video_url = string_2 + '/' + 'cont-' + str(video_id) + '-' + string_1
return video_url
num = 0
for page in range(0, 101, 10):
num += 1
print(f'=========================正在爬取第{num}页的视频内容====================')
url = f'https://www..com/popular_loading.jsp?reqType=1&categoryId=&start={page}&sort=10&mrd=0.5595334619073158'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
}
response = requests.get(url=url)
# print(response.text)
# 解析数据 提取想要内容
videos = re.findall('<a href="video_(\d+)" class="popularembd actplay">', response.text)
names = re.findall('<h2 class="popularem-title">(.*?)</h2>', response.text)
video_data = zip(videos, names)
for index in video_data:
name = index[1]
video_id = index[0]
video_url = get_video_url(video_id)
new_title = change_title(name)
video_content = requests.get(url=video_url).content
with open('video\\' + new_title + '.mp4', mode='wb') as f:
f.write(video_content)
print('正在保存: ', name)
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
源码、解答、教程加Q裙:261823976 点击蓝字加入【python学习裙】
尾语 ?
好了,我的这篇文章写到这里就结束啦!
有更多建议或问题可以评论区或私信我哦!一起加油努力叭(ง •_•)ง
喜欢就关注一下博主,或点赞收藏评论一下我的文章叭!!!