mirror of
https://github.com/ls125781003/tvboxtg.git
synced 2025-10-29 12:52:21 +00:00
4.21
up
This commit is contained in:
304
摸鱼儿/api/a848d2173d12b963b98a2ffa9796abe7.js
Normal file
304
摸鱼儿/api/a848d2173d12b963b98a2ffa9796abe7.js
Normal file
@@ -0,0 +1,304 @@
|
||||
if (typeof Object.assign != 'function') {
|
||||
Object.assign = function () {
|
||||
var target = arguments[0];
|
||||
for (var i = 1; i < arguments.length; i++) {
|
||||
var source = arguments[i];
|
||||
for (var key in source) {
|
||||
if (Object.prototype.hasOwnProperty.call(source, key)) {
|
||||
target[key] = source[key];
|
||||
}
|
||||
}
|
||||
}
|
||||
return target;
|
||||
};
|
||||
}
|
||||
function getMubans() {
|
||||
var mubanDict = { // 模板字典
|
||||
mxpro: {
|
||||
title: '',
|
||||
host: '',
|
||||
// homeUrl:'/',
|
||||
url: '/vodshow/fyclass--------fypage---.html',
|
||||
searchUrl: '/vodsearch/**----------fypage---.html',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
headers: {//网站的请求头,完整支持所有的,常带ua和cookies
|
||||
'User-Agent': 'MOBILE_UA',
|
||||
// "Cookie": "searchneed=ok"
|
||||
},
|
||||
class_parse: '.navbar-items li:gt(2):lt(8);a&&Text;a&&href;/(\\d+).html',
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
limit: 6,
|
||||
推荐: '.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
|
||||
double: true, // 推荐内容是否双层定位
|
||||
一级: 'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
|
||||
二级: {
|
||||
"title": "h1&&Text;.module-info-tag&&Text",
|
||||
"img": ".lazyload&&data-original",
|
||||
"desc": ".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text",
|
||||
"content": ".module-info-introduction&&Text",
|
||||
"tabs": ".module-tab-item",
|
||||
"lists": ".module-play-list:eq(#id) a"
|
||||
},
|
||||
搜索: 'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
|
||||
},
|
||||
mxone5: {
|
||||
title: '',
|
||||
host: '',
|
||||
url: '/show/fyclass--------fypage---.html',
|
||||
searchUrl: '/search/**----------fypage---.html',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
class_parse: '.nav-menu-items&&li;a&&Text;a&&href;.*/(.*?).html',
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
limit: 6,
|
||||
推荐: '.module-list;.module-items&&.module-item;a&&title;img&&data-src;.module-item-text&&Text;a&&href',
|
||||
double: true, // 推荐内容是否双层定位
|
||||
一级: '.module-items .module-item;a&&title;img&&data-src;.module-item-text&&Text;a&&href',
|
||||
二级: {
|
||||
"title": "h1&&Text;.tag-link&&Text",
|
||||
"img": ".module-item-pic&&img&&data-src",
|
||||
"desc": ".video-info-items:eq(0)&&Text;.video-info-items:eq(1)&&Text;.video-info-items:eq(2)&&Text;.video-info-items:eq(3)&&Text",
|
||||
"content": ".vod_content&&Text",
|
||||
"tabs": ".module-tab-item",
|
||||
"lists": ".module-player-list:eq(#id)&&.scroll-content&&a"
|
||||
},
|
||||
搜索: '.module-items .module-search-item;a&&title;img&&data-src;.video-serial&&Text;a&&href',
|
||||
},
|
||||
首图: {
|
||||
title: '',
|
||||
host: '',
|
||||
url: '/vodshow/fyclass--------fypage---/',
|
||||
searchUrl: '/vodsearch/**----------fypage---.html',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
headers: {//网站的请求头,完整支持所有的,常带ua和cookies
|
||||
'User-Agent': 'MOBILE_UA',
|
||||
// "Cookie": "searchneed=ok"
|
||||
},
|
||||
class_parse: '.myui-header__menu li.hidden-sm:gt(0):lt(5);a&&Text;a&&href;/(\\d+).html',
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
limit: 6,
|
||||
推荐: 'ul.myui-vodlist.clearfix;li;a&&title;a&&data-original;.pic-text&&Text;a&&href',
|
||||
double: true, // 推荐内容是否双层定位
|
||||
一级: '.myui-vodlist li;a&&title;a&&data-original;.pic-text&&Text;a&&href',
|
||||
二级: {
|
||||
"title": ".myui-content__detail .title&&Text;.myui-content__detail p:eq(-2)&&Text",
|
||||
"img": ".myui-content__thumb .lazyload&&data-original",
|
||||
"desc": ".myui-content__detail p:eq(0)&&Text;.myui-content__detail p:eq(1)&&Text;.myui-content__detail p:eq(2)&&Text",
|
||||
"content": ".content&&Text",
|
||||
"tabs": ".nav-tabs:eq(0) li",
|
||||
"lists": ".myui-content__list:eq(#id) li"
|
||||
},
|
||||
搜索: '#searchList li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text',
|
||||
},
|
||||
首图2: {
|
||||
title: '',
|
||||
host: '',
|
||||
url: '/list/fyclass-fypage.html',
|
||||
searchUrl: '/vodsearch/**----------fypage---.html',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
headers: {
|
||||
'User-Agent': 'UC_UA',
|
||||
// "Cookie": ""
|
||||
},
|
||||
// class_parse:'.stui-header__menu li:gt(0):lt(7);a&&Text;a&&href;/(\\d+).html',
|
||||
class_parse: '.stui-header__menu li:gt(0):lt(7);a&&Text;a&&href;.*/(.*?).html',
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
limit: 6,
|
||||
推荐: 'ul.stui-vodlist.clearfix;li;a&&title;.lazyload&&data-original;.pic-text&&Text;a&&href',
|
||||
double: true, // 推荐内容是否双层定位
|
||||
一级: '.stui-vodlist li;a&&title;a&&data-original;.pic-text&&Text;a&&href',
|
||||
二级: {
|
||||
"title": ".stui-content__detail .title&&Text;.stui-content__detail p:eq(-2)&&Text",
|
||||
"img": ".stui-content__thumb .lazyload&&data-original",
|
||||
"desc": ".stui-content__detail p:eq(0)&&Text;.stui-content__detail p:eq(1)&&Text;.stui-content__detail p:eq(2)&&Text",
|
||||
"content": ".detail&&Text",
|
||||
"tabs": ".stui-vodlist__head h3",
|
||||
"lists": ".stui-content__playlist:eq(#id) li"
|
||||
},
|
||||
搜索: 'ul.stui-vodlist__media:eq(0) li,ul.stui-vodlist:eq(0) li,#searchList li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text',
|
||||
搜索1: 'ul.stui-vodlist&&li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text',
|
||||
搜索2: 'ul.stui-vodlist__media&&li;a&&title;.lazyload&&data-original;.text-muted&&Text;a&&href;.text-muted:eq(-1)&&Text',
|
||||
},
|
||||
默认: {
|
||||
title: '',
|
||||
host: '',
|
||||
url: '/vodshow/fyclass--------fypage---.html',
|
||||
searchUrl: '/vodsearch/-------------.html?wd=**',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
headers: {
|
||||
'User-Agent': 'MOBILE_UA',
|
||||
},
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
limit: 6,
|
||||
double: true, // 推荐内容是否双层定位
|
||||
},
|
||||
vfed: {
|
||||
title: '',
|
||||
host: '',
|
||||
url: '/index.php/vod/show/id/fyclass/page/fypage.html',
|
||||
searchUrl: '/index.php/vod/search/page/fypage/wd/**.html',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
headers: {
|
||||
'User-Agent': 'UC_UA',
|
||||
},
|
||||
// class_parse:'.fed-pops-navbar&&ul.fed-part-rows&&a.fed-part-eone:gt(0):lt(5);a&&Text;a&&href;.*/(.*?).html',
|
||||
class_parse: '.fed-pops-navbar&&ul.fed-part-rows&&a;a&&Text;a&&href;.*/(.*?).html',
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
limit: 6,
|
||||
推荐: 'ul.fed-list-info.fed-part-rows;li;a.fed-list-title&&Text;a&&data-original;.fed-list-remarks&&Text;a&&href',
|
||||
double: true, // 推荐内容是否双层定位
|
||||
一级: '.fed-list-info&&li;a.fed-list-title&&Text;a&&data-original;.fed-list-remarks&&Text;a&&href',
|
||||
二级: {
|
||||
"title": "h1.fed-part-eone&&Text;.fed-deta-content&&.fed-part-rows&&li&&Text",
|
||||
"img": ".fed-list-info&&a&&data-original",
|
||||
"desc": ".fed-deta-content&&.fed-part-rows&&li:eq(1)&&Text;.fed-deta-content&&.fed-part-rows&&li:eq(2)&&Text;.fed-deta-content&&.fed-part-rows&&li:eq(3)&&Text",
|
||||
"content": ".fed-part-esan&&Text",
|
||||
"tabs": ".fed-drop-boxs&&.fed-part-rows&&li",
|
||||
"lists": ".fed-play-item:eq(#id)&&ul:eq(1)&&li"
|
||||
},
|
||||
搜索: '.fed-deta-info;h1&&Text;.lazyload&&data-original;.fed-list-remarks&&Text;a&&href;.fed-deta-content&&Text',
|
||||
},
|
||||
海螺3: {
|
||||
title: '',
|
||||
host: '',
|
||||
searchUrl: '/v_search/**----------fypage---.html',
|
||||
url: '/vod_____show/fyclass--------fypage---.html',
|
||||
headers: {
|
||||
'User-Agent': 'MOBILE_UA'
|
||||
},
|
||||
timeout: 5000,
|
||||
class_parse: 'body&&.hl-nav li:gt(0);a&&Text;a&&href;.*/(.*?).html',
|
||||
cate_exclude: '明星|专题|最新|排行',
|
||||
limit: 40,
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
推荐: '.hl-vod-list;li;a&&title;a&&data-original;.remarks&&Text;a&&href',
|
||||
double: true,
|
||||
一级: '.hl-vod-list&&.hl-list-item;a&&title;a&&data-original;.remarks&&Text;a&&href',
|
||||
二级: {
|
||||
"title": ".hl-infos-title&&Text;.hl-text-conch&&Text",
|
||||
"img": ".hl-lazy&&data-original",
|
||||
"desc": ".hl-infos-content&&.hl-text-conch&&Text",
|
||||
"content": ".hl-content-text&&Text",
|
||||
"tabs": ".hl-tabs&&a",
|
||||
"lists": ".hl-plays-list:eq(#id)&&li"
|
||||
},
|
||||
搜索: '.hl-list-item;a&&title;a&&data-original;.remarks&&Text;a&&href',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
},
|
||||
海螺2: {
|
||||
title: '',
|
||||
host: '',
|
||||
searchUrl: '/index.php/vod/search/page/fypage/wd/**/',
|
||||
url: '/index.php/vod/show/id/fyclass/page/fypage/',
|
||||
headers: {
|
||||
'User-Agent': 'MOBILE_UA'
|
||||
},
|
||||
timeout: 5000,
|
||||
class_parse: '#nav-bar li;a&&Text;a&&href;id/(.*?)/',
|
||||
limit: 40,
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
推荐: '.list-a.size;li;a&&title;.lazy&&data-original;.bt&&Text;a&&href',
|
||||
double: true,
|
||||
一级: '.list-a&&li;a&&title;.lazy&&data-original;.list-remarks&&Text;a&&href',
|
||||
二级: {
|
||||
"title": "h2&&Text;.deployment&&Text",
|
||||
"img": ".lazy&&data-original",
|
||||
"desc": ".deployment&&Text",
|
||||
"content": ".ec-show&&Text",
|
||||
"tabs": "#tag&&a",
|
||||
"lists": ".play_list_box:eq(#id)&&li"
|
||||
},
|
||||
搜索: '.search-list;a&&title;.lazy&&data-original;.deployment&&Text;a&&href',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
},
|
||||
短视: {
|
||||
title: '',
|
||||
host: '',
|
||||
// homeUrl:'/',
|
||||
url: '/channel/fyclass-fypage.html',
|
||||
searchUrl: '/search.html?wd=**',
|
||||
searchable: 2,//是否启用全局搜索,
|
||||
quickSearch: 0,//是否启用快速搜索,
|
||||
filterable: 0,//是否启用分类筛选,
|
||||
headers: {//网站的请求头,完整支持所有的,常带ua和cookies
|
||||
'User-Agent': 'MOBILE_UA',
|
||||
// "Cookie": "searchneed=ok"
|
||||
},
|
||||
class_parse: '.menu_bottom ul li;a&&Text;a&&href;.*/(.*?).html',
|
||||
cate_exclude: '解析|动态',
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
limit: 6,
|
||||
推荐: '.indexShowBox;ul&&li;a&&title;img&&data-src;.s1&&Text;a&&href',
|
||||
double: true, // 推荐内容是否双层定位
|
||||
一级: '.pic-list&&li;a&&title;img&&data-src;.s1&&Text;a&&href',
|
||||
二级: {
|
||||
"title": "h1&&Text;.content-rt&&p:eq(0)&&Text",
|
||||
"img": ".img&&img&&data-src",
|
||||
"desc": ".content-rt&&p:eq(1)&&Text;.content-rt&&p:eq(2)&&Text;.content-rt&&p:eq(3)&&Text;.content-rt&&p:eq(4)&&Text;.content-rt&&p:eq(5)&&Text",
|
||||
"content": ".zkjj_a&&Text",
|
||||
"tabs": ".py-tabs&&option",
|
||||
"lists": ".player:eq(#id) li"
|
||||
},
|
||||
搜索: '.sr_lists&&ul&&li;h3&&Text;img&&data-src;.int&&p:eq(0)&&Text;a&&href',
|
||||
},
|
||||
短视2:{
|
||||
title: '',
|
||||
host: '',
|
||||
class_name:'电影&电视剧&综艺&动漫',
|
||||
class_url:'1&2&3&4',
|
||||
searchUrl: '/index.php/ajax/suggest?mid=1&wd=**&limit=50',
|
||||
searchable: 2,
|
||||
quickSearch: 0,
|
||||
headers:{'User-Agent':'MOBILE_UA'},
|
||||
url: '/index.php/api/vod#type=fyclass&page=fypage',
|
||||
filterable:0,//是否启用分类筛选,
|
||||
filter_url:'',
|
||||
filter: {},
|
||||
filter_def:{},
|
||||
detailUrl:'/index.php/vod/detail/id/fyid.html',
|
||||
play_parse: true,
|
||||
lazy: '',
|
||||
limit: 6,
|
||||
推荐:'.list-vod.flex .public-list-box;a&&title;.lazy&&data-original;.public-list-prb&&Text;a&&href',
|
||||
一级:'js:let body=input.split("#")[1];let t=Math.round(new Date/1e3).toString();let key=md5("DS"+t+"DCC147D11943AF75");let url=input.split("#")[0];body=body+"&time="+t+"&key="+key;print(body);fetch_params.body=body;let html=post(url,fetch_params);let data=JSON.parse(html);VODS=data.list.map(function(it){it.vod_pic=urljoin2(input.split("/i")[0],it.vod_pic);return it});',
|
||||
二级:{
|
||||
"title":".slide-info-title&&Text;.slide-info:eq(3)--strong&&Text",
|
||||
"img":".detail-pic&&data-original",
|
||||
"desc":".fraction&&Text;.slide-info-remarks:eq(1)&&Text;.slide-info-remarks:eq(2)&&Text;.slide-info:eq(2)--strong&&Text;.slide-info:eq(1)--strong&&Text",
|
||||
"content":"#height_limit&&Text",
|
||||
"tabs":".anthology.wow.fadeInUp.animated&&.swiper-wrapper&&a",
|
||||
"tab_text":".swiper-slide&&Text",
|
||||
"lists":".anthology-list-box:eq(#id) li"
|
||||
},
|
||||
搜索:'json:list;name;pic;;id',
|
||||
}
|
||||
};
|
||||
return JSON.parse(JSON.stringify(mubanDict));
|
||||
}
|
||||
var mubanDict = getMubans();
|
||||
var muban = getMubans();
|
||||
export default {muban,getMubans};
|
||||
68
摸鱼儿/api/dffd4cc3bf2fdbddf56e179fb494a4fa.js
Normal file
68
摸鱼儿/api/dffd4cc3bf2fdbddf56e179fb494a4fa.js
Normal file
File diff suppressed because one or more lines are too long
1
摸鱼儿/api/drpy2.js
Normal file
1
摸鱼儿/api/drpy2.js
Normal file
File diff suppressed because one or more lines are too long
790
摸鱼儿/api/偷乐短剧.py
Normal file
790
摸鱼儿/api/偷乐短剧.py
Normal file
@@ -0,0 +1,790 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
# 偷乐短剧爬虫
|
||||
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# 导入基础类
|
||||
sys.path.append('../../')
|
||||
try:
|
||||
from base.spider import Spider
|
||||
except ImportError:
|
||||
# 本地调试时的替代实现
|
||||
class Spider:
|
||||
def init(self, extend=""):
|
||||
pass
|
||||
|
||||
class Spider(Spider):
|
||||
def __init__(self):
|
||||
# 网站主URL
|
||||
self.siteUrl = "https://www.toule.top"
|
||||
|
||||
# 根据网站实际结构,分类链接格式为: /index.php/vod/show/class/分类名/id/1.html
|
||||
# 分类ID映射 - 从网站中提取的分类
|
||||
self.cateManual = {
|
||||
"男频": "/index.php/vod/show/class/%E7%94%B7%E9%A2%91/id/1.html",
|
||||
"女频": "/index.php/vod/show/class/%E5%A5%B3%E9%A2%91/id/1.html",
|
||||
"都市": "/index.php/vod/show/class/%E9%83%BD%E5%B8%82/id/1.html",
|
||||
"赘婿": "/index.php/vod/show/class/%E8%B5%98%E5%A9%BF/id/1.html",
|
||||
"战神": "/index.php/vod/show/class/%E6%88%98%E7%A5%9E/id/1.html",
|
||||
"古代言情": "/index.php/vod/show/class/%E5%8F%A4%E4%BB%A3%E8%A8%80%E6%83%85/id/1.html",
|
||||
"现代言情": "/index.php/vod/show/class/%E7%8E%B0%E4%BB%A3%E8%A8%80%E6%83%85/id/1.html",
|
||||
"历史": "/index.php/vod/show/class/%E5%8E%86%E5%8F%B2/id/1.html",
|
||||
"玄幻": "/index.php/vod/show/class/%E7%8E%84%E5%B9%BB/id/1.html",
|
||||
"搞笑": "/index.php/vod/show/class/%E6%90%9E%E7%AC%91/id/1.html",
|
||||
"甜宠": "/index.php/vod/show/class/%E7%94%9C%E5%AE%A0/id/1.html",
|
||||
"励志": "/index.php/vod/show/class/%E5%8A%B1%E5%BF%97/id/1.html",
|
||||
"逆袭": "/index.php/vod/show/class/%E9%80%86%E8%A2%AD/id/1.html",
|
||||
"穿越": "/index.php/vod/show/class/%E7%A9%BF%E8%B6%8A/id/1.html",
|
||||
"古装": "/index.php/vod/show/class/%E5%8F%A4%E8%A3%85/id/1.html"
|
||||
}
|
||||
|
||||
# 请求头
|
||||
self.headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||||
"Referer": "https://www.toule.top/",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
|
||||
|
||||
# 缓存
|
||||
self.cache = {}
|
||||
self.cache_timeout = {}
|
||||
|
||||
def getName(self):
|
||||
return "偷乐短剧"
|
||||
|
||||
def init(self, extend=""):
|
||||
# 初始化方法,可以留空
|
||||
return
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
"""判断是否为视频格式"""
|
||||
video_formats = ['.mp4', '.m3u8', '.ts', '.flv', '.avi', '.mkv', '.mov', '.rmvb', '.3gp']
|
||||
for format in video_formats:
|
||||
if format in url.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
def manualVideoCheck(self):
|
||||
"""是否需要手动检查视频"""
|
||||
return False
|
||||
|
||||
# 工具方法 - 网络请求
|
||||
def fetch(self, url, headers=None, data=None, method="GET"):
|
||||
"""统一的网络请求方法"""
|
||||
try:
|
||||
if headers is None:
|
||||
headers = self.headers.copy()
|
||||
|
||||
if method.upper() == "GET":
|
||||
response = requests.get(url, headers=headers, params=data, timeout=10,verify=False)
|
||||
else: # POST
|
||||
response = requests.post(url, headers=headers, data=data, timeout=10,verify=False)
|
||||
|
||||
response.raise_for_status()
|
||||
response.encoding = response.apparent_encoding or 'utf-8'
|
||||
return response
|
||||
except Exception as e:
|
||||
self.log(f"请求失败: {url}, 错误: {str(e)}", "ERROR")
|
||||
return None
|
||||
|
||||
# 缓存方法
|
||||
def getCache(self, key, timeout=3600):
|
||||
"""获取缓存数据"""
|
||||
if key in self.cache and key in self.cache_timeout:
|
||||
if time.time() < self.cache_timeout[key]:
|
||||
return self.cache[key]
|
||||
else:
|
||||
del self.cache[key]
|
||||
del self.cache_timeout[key]
|
||||
return None
|
||||
|
||||
def setCache(self, key, value, timeout=3600):
|
||||
"""设置缓存数据"""
|
||||
self.cache[key] = value
|
||||
self.cache_timeout[key] = time.time() + timeout
|
||||
|
||||
# 日志方法
|
||||
def log(self, msg, level='INFO'):
|
||||
"""记录日志"""
|
||||
levels = {
|
||||
'DEBUG': 0,
|
||||
'INFO': 1,
|
||||
'WARNING': 2,
|
||||
'ERROR': 3
|
||||
}
|
||||
|
||||
current_level = 'INFO' # 可以设置为DEBUG以获取更多信息
|
||||
|
||||
if levels.get(level, 4) >= levels.get(current_level, 1):
|
||||
print(f"[{level}] {time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}")
|
||||
|
||||
# 辅助方法 - 从URL中提取视频ID
|
||||
def extractVodId(self, url):
|
||||
"""从URL中提取视频ID"""
|
||||
# 路径格式: /index.php/vod/play/id/9024/sid/1/nid/1.html
|
||||
match = re.search(r'/id/(\d+)/', url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return ""
|
||||
|
||||
# 辅助方法 - 从网页内容中提取分类
|
||||
def extractCategories(self, text):
|
||||
"""从网页内容中提取分类标签"""
|
||||
cats = []
|
||||
# 匹配标签字符串,例如: "男频,逆袭,亲情,短剧"
|
||||
if "," in text:
|
||||
parts = text.split(",")
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if part and part != "短剧":
|
||||
cats.append(part)
|
||||
return cats
|
||||
|
||||
# 主要接口实现
|
||||
def homeContent(self, filter):
|
||||
"""获取首页分类及内容"""
|
||||
result = {}
|
||||
classes = []
|
||||
|
||||
# 从缓存获取
|
||||
cache_key = 'home_classes'
|
||||
cached_classes = self.getCache(cache_key)
|
||||
if cached_classes:
|
||||
classes = cached_classes
|
||||
else:
|
||||
# 使用预定义的分类
|
||||
for k, v in self.cateManual.items():
|
||||
classes.append({
|
||||
'type_id': v, # 使用完整URL路径作为type_id
|
||||
'type_name': k
|
||||
})
|
||||
|
||||
# 保存到缓存
|
||||
self.setCache(cache_key, classes, 24*3600) # 缓存24小时
|
||||
|
||||
result['class'] = classes
|
||||
|
||||
# 获取首页推荐视频
|
||||
videos = self.homeVideoContent().get('list', [])
|
||||
result['list'] = videos
|
||||
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
"""获取首页推荐视频内容"""
|
||||
result = {'list': []}
|
||||
videos = []
|
||||
|
||||
# 从缓存获取
|
||||
cache_key = 'home_videos'
|
||||
cached_videos = self.getCache(cache_key)
|
||||
if cached_videos:
|
||||
return {'list': cached_videos}
|
||||
|
||||
try:
|
||||
response = self.fetch(self.siteUrl)
|
||||
if response and response.status_code == 200:
|
||||
html = response.text
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
# 查找最新更新区域
|
||||
latest_section = soup.find('h2', text=lambda t: t and '最新更新' in t)
|
||||
if latest_section:
|
||||
container = latest_section.parent # 获取容器
|
||||
if container:
|
||||
# 查找所有 li.item 元素
|
||||
items = container.find_all('li', class_='item')
|
||||
|
||||
for item in items:
|
||||
try:
|
||||
# 获取链接和标题
|
||||
title_link = item.find('h3')
|
||||
if not title_link:
|
||||
continue
|
||||
|
||||
title = title_link.text.strip()
|
||||
|
||||
# 获取第一个链接作为详情页链接
|
||||
link_tag = item.find('a')
|
||||
if not link_tag:
|
||||
continue
|
||||
|
||||
link = link_tag.get('href', '')
|
||||
if not link.startswith('http'):
|
||||
link = urllib.parse.urljoin(self.siteUrl, link)
|
||||
|
||||
# 提取ID
|
||||
vid = self.extractVodId(link)
|
||||
if not vid:
|
||||
continue
|
||||
|
||||
# 获取图片
|
||||
img_tag = item.find('img')
|
||||
img_url = ""
|
||||
if img_tag:
|
||||
img_url = img_tag.get('src', img_tag.get('data-src', ''))
|
||||
if img_url and not img_url.startswith('http'):
|
||||
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
|
||||
|
||||
# 获取备注信息
|
||||
remarks = ""
|
||||
remarks_tag = item.find('span', class_='remarks')
|
||||
if remarks_tag:
|
||||
remarks = remarks_tag.text.strip()
|
||||
|
||||
# 获取标签信息
|
||||
tags = ""
|
||||
tags_tag = item.find('span', class_='tags')
|
||||
if tags_tag:
|
||||
tags = tags_tag.text.strip()
|
||||
|
||||
# 合并备注和标签
|
||||
if remarks and tags:
|
||||
remarks = f"{remarks} | {tags}"
|
||||
elif tags:
|
||||
remarks = tags
|
||||
|
||||
# 构建视频项
|
||||
videos.append({
|
||||
'vod_id': vid,
|
||||
'vod_name': title,
|
||||
'vod_pic': img_url,
|
||||
'vod_remarks': remarks
|
||||
})
|
||||
except Exception as e:
|
||||
self.log(f"处理视频项时出错: {str(e)}", "ERROR")
|
||||
continue
|
||||
|
||||
# 保存到缓存
|
||||
self.setCache(cache_key, videos, 3600) # 缓存1小时
|
||||
except Exception as e:
|
||||
self.log(f"获取首页视频内容发生错误: {str(e)}", "ERROR")
|
||||
|
||||
result['list'] = videos
|
||||
return result
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
"""获取分类内容"""
|
||||
result = {}
|
||||
videos = []
|
||||
|
||||
# 处理页码
|
||||
if pg is None:
|
||||
pg = 1
|
||||
else:
|
||||
pg = int(pg)
|
||||
|
||||
# 构建分类URL - tid是完整的URL路径
|
||||
if tid.startswith("/"):
|
||||
# 替换页码,URL格式可能像: /index.php/vod/show/class/男频/id/1.html
|
||||
if pg > 1:
|
||||
if "html" in tid:
|
||||
category_url = tid.replace(".html", f"/page/{pg}.html")
|
||||
else:
|
||||
category_url = f"{tid}/page/{pg}.html"
|
||||
else:
|
||||
category_url = tid
|
||||
|
||||
full_url = urllib.parse.urljoin(self.siteUrl, category_url)
|
||||
else:
|
||||
# 如果tid不是URL路径,可能是旧版分类ID,尝试查找对应URL
|
||||
category_url = ""
|
||||
for name, url in self.cateManual.items():
|
||||
if name == tid:
|
||||
category_url = url
|
||||
break
|
||||
|
||||
if not category_url:
|
||||
self.log(f"未找到分类ID对应的URL: {tid}", "ERROR")
|
||||
result['list'] = []
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 1
|
||||
result['limit'] = 0
|
||||
result['total'] = 0
|
||||
return result
|
||||
|
||||
# 处理页码
|
||||
if pg > 1:
|
||||
if "html" in category_url:
|
||||
category_url = category_url.replace(".html", f"/page/{pg}.html")
|
||||
else:
|
||||
category_url = f"{category_url}/page/{pg}.html"
|
||||
|
||||
full_url = urllib.parse.urljoin(self.siteUrl, category_url)
|
||||
|
||||
# 请求分类页
|
||||
try:
|
||||
response = self.fetch(full_url)
|
||||
if response and response.status_code == 200:
|
||||
html = response.text
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
# 查找视频项,根据实际HTML结构调整
|
||||
items = soup.find_all('li', class_='item')
|
||||
|
||||
for item in items:
|
||||
try:
|
||||
# 获取链接和标题
|
||||
title_tag = item.find('h3')
|
||||
if not title_tag:
|
||||
continue
|
||||
|
||||
title = title_tag.text.strip()
|
||||
|
||||
# 获取链接
|
||||
link_tag = item.find('a')
|
||||
if not link_tag:
|
||||
continue
|
||||
|
||||
link = link_tag.get('href', '')
|
||||
if not link.startswith('http'):
|
||||
link = urllib.parse.urljoin(self.siteUrl, link)
|
||||
|
||||
# 提取ID
|
||||
vid = self.extractVodId(link)
|
||||
if not vid:
|
||||
continue
|
||||
|
||||
# 获取图片
|
||||
img_tag = item.find('img')
|
||||
img_url = ""
|
||||
if img_tag:
|
||||
img_url = img_tag.get('src', img_tag.get('data-src', ''))
|
||||
if img_url and not img_url.startswith('http'):
|
||||
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
|
||||
|
||||
# 获取备注信息
|
||||
remarks = ""
|
||||
remarks_tag = item.find('span', class_='remarks')
|
||||
if remarks_tag:
|
||||
remarks = remarks_tag.text.strip()
|
||||
|
||||
# 获取标签信息
|
||||
tags = ""
|
||||
tags_tag = item.find('span', class_='tags')
|
||||
if tags_tag:
|
||||
tags = tags_tag.text.strip()
|
||||
|
||||
# 合并备注和标签
|
||||
if remarks and tags:
|
||||
remarks = f"{remarks} | {tags}"
|
||||
elif tags:
|
||||
remarks = tags
|
||||
|
||||
# 构建视频项
|
||||
videos.append({
|
||||
'vod_id': vid,
|
||||
'vod_name': title,
|
||||
'vod_pic': img_url,
|
||||
'vod_remarks': remarks
|
||||
})
|
||||
except Exception as e:
|
||||
self.log(f"处理分类视频项时出错: {str(e)}", "ERROR")
|
||||
continue
|
||||
|
||||
# 查找分页信息
|
||||
# 默认值
|
||||
total = len(videos)
|
||||
pagecount = 1
|
||||
limit = 20
|
||||
|
||||
# 尝试查找分页元素
|
||||
pagination = soup.find('ul', class_='page')
|
||||
if pagination:
|
||||
# 查找最后一页的链接
|
||||
last_page_links = pagination.find_all('a')
|
||||
for link in last_page_links:
|
||||
page_text = link.text.strip()
|
||||
if page_text.isdigit():
|
||||
pagecount = max(pagecount, int(page_text))
|
||||
except Exception as e:
|
||||
self.log(f"获取分类内容发生错误: {str(e)}", "ERROR")
|
||||
|
||||
result['list'] = videos
|
||||
result['page'] = pg
|
||||
result['pagecount'] = pagecount
|
||||
result['limit'] = limit
|
||||
result['total'] = total
|
||||
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
"""获取详情内容"""
|
||||
result = {}
|
||||
|
||||
if not ids or len(ids) == 0:
|
||||
return result
|
||||
|
||||
# 视频ID
|
||||
vid = ids[0]
|
||||
|
||||
# 构建播放页URL
|
||||
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
|
||||
|
||||
try:
|
||||
response = self.fetch(play_url)
|
||||
if not response or response.status_code != 200:
|
||||
return result
|
||||
|
||||
html = response.text
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
# 提取视频基本信息
|
||||
# 标题
|
||||
title = ""
|
||||
title_tag = soup.find('h1', class_='items-title')
|
||||
if title_tag:
|
||||
title = title_tag.text.strip()
|
||||
|
||||
# 图片
|
||||
pic = ""
|
||||
pic_tag = soup.find('img', class_='thumb')
|
||||
if pic_tag:
|
||||
pic = pic_tag.get('src', '')
|
||||
if pic and not pic.startswith('http'):
|
||||
pic = urllib.parse.urljoin(self.siteUrl, pic)
|
||||
|
||||
# 简介
|
||||
desc = ""
|
||||
desc_tag = soup.find('div', class_='text-content')
|
||||
if desc_tag:
|
||||
desc = desc_tag.text.strip()
|
||||
|
||||
# 标签/分类
|
||||
tags = []
|
||||
tags_container = soup.find('span', class_='items-tags')
|
||||
if tags_container:
|
||||
tag_links = tags_container.find_all('a')
|
||||
for tag in tag_links:
|
||||
tag_text = tag.text.strip()
|
||||
if tag_text:
|
||||
tags.append(tag_text)
|
||||
|
||||
# 提取播放列表
|
||||
play_from = "偷乐短剧"
|
||||
play_list = []
|
||||
|
||||
# 查找播放列表区域
|
||||
play_area = soup.find('div', class_='swiper-wrapper')
|
||||
if play_area:
|
||||
# 查找所有剧集链接
|
||||
episode_links = play_area.find_all('a')
|
||||
for ep in episode_links:
|
||||
ep_title = ep.text.strip()
|
||||
ep_url = ep.get('href', '')
|
||||
|
||||
if ep_url:
|
||||
# 直接使用URL作为ID
|
||||
if not ep_url.startswith('http'):
|
||||
ep_url = urllib.parse.urljoin(self.siteUrl, ep_url)
|
||||
|
||||
# 提取集数信息
|
||||
ep_num = ep_title
|
||||
if ep_num.isdigit():
|
||||
ep_num = f"第{ep_num}集"
|
||||
|
||||
play_list.append(f"{ep_num}${ep_url}")
|
||||
|
||||
# 如果没有找到播放列表,查找播放按钮
|
||||
if not play_list:
|
||||
play_btn = soup.find('a', class_='btn-play')
|
||||
if play_btn:
|
||||
play_url = play_btn.get('href', '')
|
||||
if play_url:
|
||||
if not play_url.startswith('http'):
|
||||
play_url = urllib.parse.urljoin(self.siteUrl, play_url)
|
||||
|
||||
play_list.append(f"播放${play_url}")
|
||||
|
||||
# 如果仍然没有找到播放链接,使用播放页URL
|
||||
if not play_list:
|
||||
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
|
||||
play_list.append(f"播放${play_url}")
|
||||
|
||||
# 提取更多信息(导演、演员等)
|
||||
director = ""
|
||||
actor = ""
|
||||
year = ""
|
||||
area = ""
|
||||
remarks = ""
|
||||
|
||||
# 查找备注信息
|
||||
meta_items = soup.find_all('div', class_='meta-item')
|
||||
for item in meta_items:
|
||||
item_title = item.find('span', class_='item-title')
|
||||
item_content = item.find('span', class_='item-content')
|
||||
|
||||
if item_title and item_content:
|
||||
title_text = item_title.text.strip()
|
||||
content_text = item_content.text.strip()
|
||||
|
||||
if "导演" in title_text:
|
||||
director = content_text
|
||||
elif "主演" in title_text:
|
||||
actor = content_text
|
||||
elif "年份" in title_text:
|
||||
year = content_text
|
||||
elif "地区" in title_text:
|
||||
area = content_text
|
||||
elif "简介" in title_text:
|
||||
if not desc:
|
||||
desc = content_text
|
||||
elif "状态" in title_text:
|
||||
remarks = content_text
|
||||
|
||||
# 如果没有从meta-item中获取到remarks
|
||||
if not remarks:
|
||||
remarks_tag = soup.find('span', class_='remarks')
|
||||
if remarks_tag:
|
||||
remarks = remarks_tag.text.strip()
|
||||
|
||||
# 构建标准数据结构
|
||||
vod = {
|
||||
"vod_id": vid,
|
||||
"vod_name": title,
|
||||
"vod_pic": pic,
|
||||
"vod_year": year,
|
||||
"vod_area": area,
|
||||
"vod_remarks": remarks,
|
||||
"vod_actor": actor,
|
||||
"vod_director": director,
|
||||
"vod_content": desc,
|
||||
"type_name": ",".join(tags),
|
||||
"vod_play_from": play_from,
|
||||
"vod_play_url": "#".join(play_list)
|
||||
}
|
||||
|
||||
result = {
|
||||
'list': [vod]
|
||||
}
|
||||
except Exception as e:
|
||||
self.log(f"获取详情内容时出错: {str(e)}", "ERROR")
|
||||
|
||||
return result
|
||||
|
||||
def searchContent(self, key, quick, pg=1):
|
||||
"""搜索功能"""
|
||||
result = {}
|
||||
videos = []
|
||||
|
||||
# 构建搜索URL和参数
|
||||
search_url = f"{self.siteUrl}/index.php/vod/search.html"
|
||||
params = {"wd": key}
|
||||
|
||||
try:
|
||||
response = self.fetch(search_url, data=params)
|
||||
if response and response.status_code == 200:
|
||||
html = response.text
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
# 查找搜索结果项
|
||||
search_items = soup.find_all('li', class_='item')
|
||||
|
||||
for item in search_items:
|
||||
try:
|
||||
# 获取标题
|
||||
title_tag = item.find('h3')
|
||||
if not title_tag:
|
||||
continue
|
||||
|
||||
title = title_tag.text.strip()
|
||||
|
||||
# 获取链接
|
||||
link_tag = item.find('a')
|
||||
if not link_tag:
|
||||
continue
|
||||
|
||||
link = link_tag.get('href', '')
|
||||
if not link.startswith('http'):
|
||||
link = urllib.parse.urljoin(self.siteUrl, link)
|
||||
|
||||
# 提取视频ID
|
||||
vid = self.extractVodId(link)
|
||||
if not vid:
|
||||
continue
|
||||
|
||||
# 获取图片
|
||||
img_tag = item.find('img')
|
||||
img_url = ""
|
||||
if img_tag:
|
||||
img_url = img_tag.get('src', img_tag.get('data-src', ''))
|
||||
if img_url and not img_url.startswith('http'):
|
||||
img_url = urllib.parse.urljoin(self.siteUrl, img_url)
|
||||
|
||||
# 获取备注信息
|
||||
remarks = ""
|
||||
remarks_tag = item.find('span', class_='remarks')
|
||||
if remarks_tag:
|
||||
remarks = remarks_tag.text.strip()
|
||||
|
||||
# 获取标签信息
|
||||
tags = ""
|
||||
tags_tag = item.find('span', class_='tags')
|
||||
if tags_tag:
|
||||
tags = tags_tag.text.strip()
|
||||
|
||||
# 合并备注和标签
|
||||
if remarks and tags:
|
||||
remarks = f"{remarks} | {tags}"
|
||||
elif tags:
|
||||
remarks = tags
|
||||
|
||||
# 构建视频项
|
||||
videos.append({
|
||||
'vod_id': vid,
|
||||
'vod_name': title,
|
||||
'vod_pic': img_url,
|
||||
'vod_remarks': remarks
|
||||
})
|
||||
except Exception as e:
|
||||
self.log(f"处理搜索结果时出错: {str(e)}", "ERROR")
|
||||
continue
|
||||
except Exception as e:
|
||||
self.log(f"搜索功能发生错误: {str(e)}", "ERROR")
|
||||
|
||||
result['list'] = videos
|
||||
return result
|
||||
|
||||
def searchContentPage(self, key, quick, pg=1):
|
||||
return self.searchContent(key, quick, pg)
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
"""获取播放内容"""
|
||||
result = {}
|
||||
|
||||
try:
|
||||
# 判断是否已经是视频URL
|
||||
if self.isVideoFormat(id):
|
||||
result["parse"] = 0
|
||||
result["url"] = id
|
||||
result["playUrl"] = ""
|
||||
result["header"] = json.dumps(self.headers)
|
||||
return result
|
||||
|
||||
# 判断是否是完整的页面URL
|
||||
if id.startswith(('http://', 'https://')):
|
||||
play_url = id
|
||||
# 尝试作为相对路径处理
|
||||
elif id.startswith('/'):
|
||||
play_url = urllib.parse.urljoin(self.siteUrl, id)
|
||||
# 假设是视频ID,构建播放页面URL
|
||||
else:
|
||||
# 检查是否是"视频ID_集数"格式
|
||||
parts = id.split('_')
|
||||
if len(parts) > 1 and parts[0].isdigit():
|
||||
vid = parts[0]
|
||||
nid = parts[1]
|
||||
play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/{nid}.html"
|
||||
else:
|
||||
# 直接当作视频ID处理
|
||||
play_url = f"{self.siteUrl}/index.php/vod/play/id/{id}/sid/1/nid/1.html"
|
||||
|
||||
# 访问播放页获取真实播放地址
|
||||
try:
|
||||
self.log(f"正在解析播放页面: {play_url}")
|
||||
response = self.fetch(play_url)
|
||||
if response and response.status_code == 200:
|
||||
html = response.text
|
||||
|
||||
# 查找player_aaaa变量
|
||||
player_match = re.search(r'var\s+player_aaaa\s*=\s*({.*?});', html, re.DOTALL)
|
||||
if player_match:
|
||||
try:
|
||||
player_data = json.loads(player_match.group(1))
|
||||
if 'url' in player_data:
|
||||
video_url = player_data['url']
|
||||
if not video_url.startswith('http'):
|
||||
video_url = urllib.parse.urljoin(self.siteUrl, video_url)
|
||||
|
||||
self.log(f"从player_aaaa获取到视频地址: {video_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = video_url
|
||||
result["playUrl"] = ""
|
||||
result["header"] = json.dumps(self.headers)
|
||||
return result
|
||||
except json.JSONDecodeError as e:
|
||||
self.log(f"解析player_aaaa JSON出错: {str(e)}", "ERROR")
|
||||
|
||||
# 如果player_aaaa解析失败,尝试其他方式
|
||||
# 1. 查找video标签
|
||||
video_match = re.search(r'<video[^>]*src=["\'](.*?)["\']', html)
|
||||
if video_match:
|
||||
video_url = video_match.group(1)
|
||||
if not video_url.startswith('http'):
|
||||
video_url = urllib.parse.urljoin(self.siteUrl, video_url)
|
||||
|
||||
self.log(f"从video标签找到视频地址: {video_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = video_url
|
||||
result["playUrl"] = ""
|
||||
result["header"] = json.dumps(self.headers)
|
||||
return result
|
||||
|
||||
# 2. 查找iframe
|
||||
iframe_match = re.search(r'<iframe[^>]*src=["\'](.*?)["\']', html)
|
||||
if iframe_match:
|
||||
iframe_url = iframe_match.group(1)
|
||||
if not iframe_url.startswith('http'):
|
||||
iframe_url = urllib.parse.urljoin(self.siteUrl, iframe_url)
|
||||
|
||||
self.log(f"找到iframe,正在解析: {iframe_url}")
|
||||
# 访问iframe内容
|
||||
iframe_response = self.fetch(iframe_url)
|
||||
if iframe_response and iframe_response.status_code == 200:
|
||||
iframe_html = iframe_response.text
|
||||
|
||||
# 在iframe内容中查找视频地址
|
||||
iframe_video_match = re.search(r'(https?://[^\'"]+\.(mp4|m3u8|ts))', iframe_html)
|
||||
if iframe_video_match:
|
||||
video_url = iframe_video_match.group(1)
|
||||
|
||||
self.log(f"从iframe中找到视频地址: {video_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = video_url
|
||||
result["playUrl"] = ""
|
||||
result["header"] = json.dumps({
|
||||
"User-Agent": self.headers["User-Agent"],
|
||||
"Referer": iframe_url
|
||||
})
|
||||
return result
|
||||
|
||||
# 3. 查找任何可能的视频URL
|
||||
url_match = re.search(r'(https?://[^\'"]+\.(mp4|m3u8|ts))', html)
|
||||
if url_match:
|
||||
video_url = url_match.group(1)
|
||||
|
||||
self.log(f"找到可能的视频地址: {video_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = video_url
|
||||
result["playUrl"] = ""
|
||||
result["header"] = json.dumps(self.headers)
|
||||
return result
|
||||
except Exception as e:
|
||||
self.log(f"解析播放地址时出错: {str(e)}", "ERROR")
|
||||
|
||||
# 如果所有方式都失败,返回外部解析标志
|
||||
self.log("未找到直接可用的视频地址,需要外部解析", "WARNING")
|
||||
result["parse"] = 1 # 表示需要外部解析
|
||||
result["url"] = play_url # 返回播放页面URL
|
||||
result["playUrl"] = ""
|
||||
result["header"] = json.dumps(self.headers)
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"获取播放内容时出错: {str(e)}", "ERROR")
|
||||
|
||||
return result
|
||||
|
||||
def localProxy(self, param):
|
||||
"""本地代理"""
|
||||
return [404, "text/plain", {}, "Not Found"]
|
||||
581
摸鱼儿/api/河马短剧.py
Normal file
581
摸鱼儿/api/河马短剧.py
Normal file
@@ -0,0 +1,581 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import requests
|
||||
import re
|
||||
import json
|
||||
import traceback
|
||||
import sys
|
||||
|
||||
sys.path.append('../../')
|
||||
try:
|
||||
from base.spider import Spider
|
||||
except ImportError:
|
||||
# 定义一个基础接口类,用于本地测试
|
||||
class Spider:
|
||||
def init(self, extend=""):
|
||||
pass
|
||||
|
||||
class Spider(Spider):
|
||||
def __init__(self):
|
||||
self.siteUrl = "https://www.kuaikaw.cn"
|
||||
self.nextData = None # 缓存NEXT_DATA数据
|
||||
self.cateManual = {
|
||||
"甜宠": "462",
|
||||
"古装仙侠": "1102",
|
||||
"现代言情": "1145",
|
||||
"青春": "1170",
|
||||
"豪门恩怨": "585",
|
||||
"逆袭": "417-464",
|
||||
"重生": "439-465",
|
||||
"系统": "1159",
|
||||
"总裁": "1147",
|
||||
"职场商战": "943"
|
||||
}
|
||||
|
||||
def getName(self):
|
||||
# 返回爬虫名称
|
||||
return "河马短剧"
|
||||
|
||||
def init(self, extend=""):
|
||||
return
|
||||
|
||||
def fetch(self, url, headers=None):
|
||||
"""统一的网络请求接口"""
|
||||
if headers is None:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
||||
"Referer": self.siteUrl,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10, allow_redirects=True)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except Exception as e:
|
||||
print(f"请求异常: {url}, 错误: {str(e)}")
|
||||
return None
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
# 检查是否为视频格式
|
||||
video_formats = ['.mp4', '.mkv', '.avi', '.wmv', '.m3u8', '.flv', '.rmvb']
|
||||
for format in video_formats:
|
||||
if format in url.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
def manualVideoCheck(self):
|
||||
# 不需要手动检查
|
||||
return False
|
||||
|
||||
def homeContent(self, filter):
|
||||
"""获取首页分类及筛选"""
|
||||
result = {}
|
||||
# 分类列表,使用已初始化的cateManual
|
||||
classes = []
|
||||
for k in self.cateManual:
|
||||
classes.append({
|
||||
'type_name': k,
|
||||
'type_id': self.cateManual[k]
|
||||
})
|
||||
result['class'] = classes
|
||||
# 获取首页推荐视频
|
||||
try:
|
||||
result['list'] = self.homeVideoContent()['list']
|
||||
except:
|
||||
result['list'] = []
|
||||
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
"""获取首页推荐视频内容"""
|
||||
videos = []
|
||||
try:
|
||||
response = self.fetch(self.siteUrl)
|
||||
html_content = response.text
|
||||
# 提取NEXT_DATA JSON数据
|
||||
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
|
||||
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
|
||||
if next_data_match:
|
||||
next_data_json = json.loads(next_data_match.group(1))
|
||||
page_props = next_data_json.get("props", {}).get("pageProps", {})
|
||||
# 获取轮播图数据 - 这些通常是推荐内容
|
||||
if "bannerList" in page_props and isinstance(page_props["bannerList"], list):
|
||||
banner_list = page_props["bannerList"]
|
||||
for banner in banner_list:
|
||||
book_id = banner.get("bookId", "")
|
||||
book_name = banner.get("bookName", "")
|
||||
cover_url = banner.get("coverWap", banner.get("wapUrl", ""))
|
||||
# 获取状态和章节数
|
||||
status = banner.get("statusDesc", "")
|
||||
total_chapters = banner.get("totalChapterNum", "")
|
||||
if book_id and book_name:
|
||||
videos.append({
|
||||
"vod_id": f"/drama/{book_id}",
|
||||
"vod_name": book_name,
|
||||
"vod_pic": cover_url,
|
||||
"vod_remarks": f"{status} {total_chapters}集" if total_chapters else status
|
||||
})
|
||||
|
||||
# SEO分类下的推荐
|
||||
if "seoColumnVos" in page_props and isinstance(page_props["seoColumnVos"], list):
|
||||
for column in page_props["seoColumnVos"]:
|
||||
book_infos = column.get("bookInfos", [])
|
||||
for book in book_infos:
|
||||
book_id = book.get("bookId", "")
|
||||
book_name = book.get("bookName", "")
|
||||
cover_url = book.get("coverWap", "")
|
||||
status = book.get("statusDesc", "")
|
||||
total_chapters = book.get("totalChapterNum", "")
|
||||
|
||||
if book_id and book_name:
|
||||
videos.append({
|
||||
"vod_id": f"/drama/{book_id}",
|
||||
"vod_name": book_name,
|
||||
"vod_pic": cover_url,
|
||||
"vod_remarks": f"{status} {total_chapters}集" if total_chapters else status
|
||||
})
|
||||
|
||||
# # 去重
|
||||
# seen = set()
|
||||
# unique_videos = []
|
||||
# for video in videos:
|
||||
# if video["vod_id"] not in seen:
|
||||
# seen.add(video["vod_id"])
|
||||
# unique_videos.append(video)
|
||||
# videos = unique_videos
|
||||
|
||||
except Exception as e:
|
||||
print(f"获取首页推荐内容出错: {e}")
|
||||
|
||||
result = {
|
||||
"list": videos
|
||||
}
|
||||
return result
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
"""获取分类内容"""
|
||||
result = {}
|
||||
videos = []
|
||||
url = f"{self.siteUrl}/browse/{tid}/{pg}"
|
||||
response = self.fetch(url)
|
||||
html_content = response.text
|
||||
# 提取NEXT_DATA JSON数据
|
||||
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
|
||||
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
|
||||
if next_data_match:
|
||||
next_data_json = json.loads(next_data_match.group(1))
|
||||
page_props = next_data_json.get("props", {}).get("pageProps", {})
|
||||
# 获取总页数和当前页
|
||||
current_page = page_props.get("page", 1)
|
||||
total_pages = page_props.get("pages", 1)
|
||||
# 获取书籍列表
|
||||
book_list = page_props.get("bookList", [])
|
||||
# 转换为通用格式
|
||||
for book in book_list:
|
||||
book_id = book.get("bookId", "")
|
||||
book_name = book.get("bookName", "")
|
||||
cover_url = book.get("coverWap", "")
|
||||
status_desc = book.get("statusDesc", "")
|
||||
total_chapters = book.get("totalChapterNum", "")
|
||||
if book_id and book_name:
|
||||
videos.append({
|
||||
"vod_id": f"/drama/{book_id}",
|
||||
"vod_name": book_name,
|
||||
"vod_pic": cover_url,
|
||||
"vod_remarks": f"{status_desc} {total_chapters}集" if total_chapters else status_desc
|
||||
})
|
||||
# 构建返回结果
|
||||
result = {
|
||||
"list": videos,
|
||||
"page": int(current_page),
|
||||
"pagecount": total_pages,
|
||||
"limit": len(videos),
|
||||
"total": total_pages * len(videos) if videos else 0
|
||||
}
|
||||
return result
|
||||
|
||||
def switch(self, key, pg):
|
||||
# 搜索功能
|
||||
search_results = []
|
||||
# 获取第一页结果,并检查总页数
|
||||
url = f"{self.siteUrl}/search?searchValue={key}&page={pg}"
|
||||
response = self.fetch(url)
|
||||
html_content = response.text
|
||||
# 提取NEXT_DATA JSON数据
|
||||
next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
|
||||
next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
|
||||
if next_data_match:
|
||||
next_data_json = json.loads(next_data_match.group(1))
|
||||
page_props = next_data_json.get("props", {}).get("pageProps", {})
|
||||
# 获取总页数
|
||||
total_pages = page_props.get("pages", 1)
|
||||
# 处理所有页的数据
|
||||
all_book_list = []
|
||||
# 添加第一页的书籍列表
|
||||
book_list = page_props.get("bookList", [])
|
||||
all_book_list.extend(book_list)
|
||||
# 如果有多页,获取其他页的数据
|
||||
if total_pages > 1 : # quick模式只获取第一页
|
||||
for page in range(2, total_pages + 1):
|
||||
next_page_url = f"{self.siteUrl}/search?searchValue={key}&page={page}"
|
||||
next_page_response = self.fetch(next_page_url)
|
||||
next_page_html = next_page_response.text
|
||||
next_page_match = re.search(next_data_pattern, next_page_html, re.DOTALL)
|
||||
if next_page_match:
|
||||
next_page_json = json.loads(next_page_match.group(1))
|
||||
next_page_props = next_page_json.get("props", {}).get("pageProps", {})
|
||||
next_page_books = next_page_props.get("bookList", [])
|
||||
all_book_list.extend(next_page_books)
|
||||
# 转换为统一的搜索结果格式
|
||||
for book in all_book_list:
|
||||
book_id = book.get("bookId", "")
|
||||
book_name = book.get("bookName", "")
|
||||
cover_url = book.get("coverWap", "")
|
||||
total_chapters = book.get("totalChapterNum", "0")
|
||||
status_desc = book.get("statusDesc", "")
|
||||
# 构建视频项
|
||||
vod = {
|
||||
"vod_id": f"/drama/{book_id}",
|
||||
"vod_name": book_name,
|
||||
"vod_pic": cover_url,
|
||||
"vod_remarks": f"{status_desc} {total_chapters}集"
|
||||
}
|
||||
search_results.append(vod)
|
||||
result = {
|
||||
"list": search_results,
|
||||
"page": pg
|
||||
}
|
||||
return result
|
||||
|
||||
def searchContent(self, key, quick, pg=1):
|
||||
result = self.switch(key, pg=pg)
|
||||
result['page'] = pg
|
||||
return result
|
||||
|
||||
def searchContentPage(self, key, quick, pg=1):
|
||||
return self.searchContent(key, quick, pg)
|
||||
|
||||
def detailContent(self, ids):
|
||||
# 获取剧集信息
|
||||
vod_id = ids[0]
|
||||
episode_id = None
|
||||
chapter_id = None
|
||||
|
||||
if not vod_id.startswith('/drama/'):
|
||||
if vod_id.startswith('/episode/'):
|
||||
episode_info = vod_id.replace('/episode/', '').split('/')
|
||||
if len(episode_info) >= 2:
|
||||
episode_id = episode_info[0]
|
||||
chapter_id = episode_info[1]
|
||||
vod_id = f'/drama/{episode_id}'
|
||||
else:
|
||||
vod_id = '/drama/' + vod_id
|
||||
|
||||
drama_url = self.siteUrl + vod_id
|
||||
print(f"请求URL: {drama_url}")
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
||||
"Referer": self.siteUrl,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
|
||||
}
|
||||
|
||||
rsp = self.fetch(drama_url, headers=headers)
|
||||
if not rsp or rsp.status_code != 200:
|
||||
print(f"请求失败,状态码: {getattr(rsp, 'status_code', 'N/A')}")
|
||||
return {}
|
||||
|
||||
html = rsp.text
|
||||
next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html, re.DOTALL)
|
||||
|
||||
if not next_data_match:
|
||||
print("未找到NEXT_DATA内容")
|
||||
return {}
|
||||
|
||||
try:
|
||||
next_data = json.loads(next_data_match.group(1))
|
||||
page_props = next_data.get("props", {}).get("pageProps", {})
|
||||
print(f"找到页面属性,包含 {len(page_props.keys())} 个键")
|
||||
|
||||
book_info = page_props.get("bookInfoVo", {})
|
||||
chapter_list = page_props.get("chapterList", [])
|
||||
|
||||
title = book_info.get("title", "")
|
||||
sub_title = f"{book_info.get('totalChapterNum', '')}集"
|
||||
|
||||
categories = []
|
||||
for category in book_info.get("categoryList", []):
|
||||
categories.append(category.get("name", ""))
|
||||
|
||||
vod_content = book_info.get("introduction", "")
|
||||
|
||||
vod = {
|
||||
"vod_id": vod_id,
|
||||
"vod_name": title,
|
||||
"vod_pic": book_info.get("coverWap", ""),
|
||||
"type_name": ",".join(categories),
|
||||
"vod_year": "",
|
||||
"vod_area": book_info.get("countryName", ""),
|
||||
"vod_remarks": sub_title,
|
||||
"vod_actor": ", ".join([p.get("name", "") for p in book_info.get("performerList", [])]),
|
||||
"vod_director": "",
|
||||
"vod_content": vod_content
|
||||
}
|
||||
|
||||
# 处理播放列表
|
||||
play_url_list = []
|
||||
episodes = []
|
||||
|
||||
if chapter_list:
|
||||
print(f"找到 {len(chapter_list)} 个章节")
|
||||
|
||||
# 先检查是否有可以直接使用的MP4链接作为模板
|
||||
mp4_template = None
|
||||
first_mp4_chapter_id = None
|
||||
|
||||
# 先搜索第一个章节的MP4链接
|
||||
# 为提高成功率,尝试直接请求第一个章节的播放页
|
||||
if chapter_list and len(chapter_list) > 0:
|
||||
first_chapter = chapter_list[0]
|
||||
first_chapter_id = first_chapter.get("chapterId", "")
|
||||
drama_id_clean = vod_id.replace('/drama/', '')
|
||||
|
||||
if first_chapter_id and drama_id_clean:
|
||||
first_episode_url = f"{self.siteUrl}/episode/{drama_id_clean}/{first_chapter_id}"
|
||||
print(f"请求第一集播放页: {first_episode_url}")
|
||||
|
||||
first_rsp = self.fetch(first_episode_url, headers=headers)
|
||||
if first_rsp and first_rsp.status_code == 200:
|
||||
first_html = first_rsp.text
|
||||
# 直接从HTML提取MP4链接
|
||||
mp4_pattern = r'(https?://[^"\']+\.mp4)'
|
||||
mp4_matches = re.findall(mp4_pattern, first_html)
|
||||
if mp4_matches:
|
||||
mp4_template = mp4_matches[0]
|
||||
first_mp4_chapter_id = first_chapter_id
|
||||
print(f"找到MP4链接模板: {mp4_template}")
|
||||
print(f"模板对应的章节ID: {first_mp4_chapter_id}")
|
||||
|
||||
# 如果未找到模板,再检查章节对象中是否有MP4链接
|
||||
if not mp4_template:
|
||||
for chapter in chapter_list[:5]: # 只检查前5个章节以提高效率
|
||||
if "chapterVideoVo" in chapter and chapter["chapterVideoVo"]:
|
||||
chapter_video = chapter["chapterVideoVo"]
|
||||
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
|
||||
if mp4_url and ".mp4" in mp4_url:
|
||||
mp4_template = mp4_url
|
||||
first_mp4_chapter_id = chapter.get("chapterId", "")
|
||||
print(f"从chapterVideoVo找到MP4链接模板: {mp4_template}")
|
||||
print(f"模板对应的章节ID: {first_mp4_chapter_id}")
|
||||
break
|
||||
|
||||
# 遍历所有章节处理播放信息
|
||||
for chapter in chapter_list:
|
||||
chapter_id = chapter.get("chapterId", "")
|
||||
chapter_name = chapter.get("chapterName", "")
|
||||
|
||||
# 1. 如果章节自身有MP4链接,直接使用
|
||||
if "chapterVideoVo" in chapter and chapter["chapterVideoVo"]:
|
||||
chapter_video = chapter["chapterVideoVo"]
|
||||
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
|
||||
if mp4_url and ".mp4" in mp4_url:
|
||||
episodes.append(f"{chapter_name}${mp4_url}")
|
||||
continue
|
||||
|
||||
# 2. 如果有MP4模板,尝试替换章节ID构建MP4链接
|
||||
if mp4_template and first_mp4_chapter_id and chapter_id:
|
||||
# 替换模板中的章节ID部分
|
||||
if first_mp4_chapter_id in mp4_template:
|
||||
new_mp4_url = mp4_template.replace(first_mp4_chapter_id, chapter_id)
|
||||
episodes.append(f"{chapter_name}${new_mp4_url}")
|
||||
continue
|
||||
|
||||
# 3. 如果上述方法都不可行,回退到使用chapter_id构建中间URL
|
||||
if chapter_id and chapter_name:
|
||||
url = f"{vod_id}${chapter_id}${chapter_name}"
|
||||
episodes.append(f"{chapter_name}${url}")
|
||||
|
||||
if not episodes and vod_id:
|
||||
# 尝试构造默认的集数
|
||||
total_chapters = int(book_info.get("totalChapterNum", "0"))
|
||||
if total_chapters > 0:
|
||||
print(f"尝试构造 {total_chapters} 个默认集数")
|
||||
|
||||
# 如果知道章节ID的模式,可以构造
|
||||
if chapter_id and episode_id:
|
||||
for i in range(1, total_chapters + 1):
|
||||
chapter_name = f"第{i}集"
|
||||
url = f"{vod_id}${chapter_id}${chapter_name}"
|
||||
episodes.append(f"{chapter_name}${url}")
|
||||
else:
|
||||
# 使用普通的构造方式
|
||||
for i in range(1, total_chapters + 1):
|
||||
chapter_name = f"第{i}集"
|
||||
url = f"{vod_id}${chapter_name}"
|
||||
episodes.append(f"{chapter_name}${url}")
|
||||
|
||||
if episodes:
|
||||
play_url_list.append("#".join(episodes))
|
||||
vod['vod_play_from'] = '河马剧场'
|
||||
vod['vod_play_url'] = '$$$'.join(play_url_list)
|
||||
|
||||
result = {
|
||||
'list': [vod]
|
||||
}
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"解析详情页失败: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
return {}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
result = {}
|
||||
print(f"调用playerContent: flag={flag}, id={id}")
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
||||
"Referer": self.siteUrl,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
|
||||
}
|
||||
|
||||
# 解析id参数
|
||||
parts = id.split('$')
|
||||
drama_id = None
|
||||
chapter_id = None
|
||||
|
||||
if len(parts) >= 2:
|
||||
drama_id = parts[0]
|
||||
chapter_id = parts[1]
|
||||
chapter_name = parts[2] if len(parts) > 2 else "第一集"
|
||||
print(f"解析参数: drama_id={drama_id}, chapter_id={chapter_id}")
|
||||
else:
|
||||
# 处理旧数据格式
|
||||
print(f"使用原始URL格式: {id}")
|
||||
result["parse"] = 0
|
||||
result["url"] = id
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
# 直接检查chapter_id是否包含http(可能已经是视频链接)
|
||||
if 'http' in chapter_id and '.mp4' in chapter_id:
|
||||
print(f"已经是MP4链接: {chapter_id}")
|
||||
result["parse"] = 0
|
||||
result["url"] = chapter_id
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
# 构建episode页面URL
|
||||
drama_id_clean = drama_id.replace('/drama/', '')
|
||||
episode_url = f"{self.siteUrl}/episode/{drama_id_clean}/{chapter_id}"
|
||||
print(f"请求episode页面: {episode_url}")
|
||||
|
||||
try:
|
||||
rsp = self.fetch(episode_url, headers=headers)
|
||||
if not rsp or rsp.status_code != 200:
|
||||
print(f"请求失败,状态码: {getattr(rsp, 'status_code', 'N/A')}")
|
||||
result["parse"] = 0
|
||||
result["url"] = id
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
html = rsp.text
|
||||
print(f"获取页面大小: {len(html)} 字节")
|
||||
|
||||
# 尝试从NEXT_DATA提取视频链接
|
||||
mp4_url = None
|
||||
|
||||
# 方法1: 从NEXT_DATA提取
|
||||
next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html, re.DOTALL)
|
||||
if next_data_match:
|
||||
try:
|
||||
print("找到NEXT_DATA")
|
||||
next_data = json.loads(next_data_match.group(1))
|
||||
page_props = next_data.get("props", {}).get("pageProps", {})
|
||||
|
||||
# 从chapterList中查找当前章节
|
||||
chapter_list = page_props.get("chapterList", [])
|
||||
print(f"找到章节列表,长度: {len(chapter_list)}")
|
||||
|
||||
for chapter in chapter_list:
|
||||
if chapter.get("chapterId") == chapter_id:
|
||||
print(f"找到匹配的章节: {chapter.get('chapterName')}")
|
||||
chapter_video = chapter.get("chapterVideoVo", {})
|
||||
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
|
||||
if mp4_url:
|
||||
print(f"从chapterList找到MP4链接: {mp4_url}")
|
||||
break
|
||||
|
||||
# 如果未找到,尝试从当前章节获取
|
||||
if not mp4_url:
|
||||
current_chapter = page_props.get("chapterInfo", {})
|
||||
if current_chapter:
|
||||
print("找到当前章节信息")
|
||||
chapter_video = current_chapter.get("chapterVideoVo", {})
|
||||
mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
|
||||
if mp4_url:
|
||||
print(f"从chapterInfo找到MP4链接: {mp4_url}")
|
||||
except Exception as e:
|
||||
print(f"解析NEXT_DATA失败: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
|
||||
# 方法2: 直接从HTML中提取MP4链接
|
||||
if not mp4_url:
|
||||
mp4_pattern = r'(https?://[^"\']+\.mp4)'
|
||||
mp4_matches = re.findall(mp4_pattern, html)
|
||||
if mp4_matches:
|
||||
# 查找含有chapter_id的链接
|
||||
matched_mp4 = False
|
||||
for url in mp4_matches:
|
||||
if chapter_id in url:
|
||||
mp4_url = url
|
||||
matched_mp4 = True
|
||||
print(f"从HTML直接提取章节MP4链接: {mp4_url}")
|
||||
break
|
||||
|
||||
# 如果没找到包含chapter_id的链接,使用第一个
|
||||
if not matched_mp4 and mp4_matches:
|
||||
mp4_url = mp4_matches[0]
|
||||
print(f"从HTML直接提取MP4链接: {mp4_url}")
|
||||
|
||||
if mp4_url and ".mp4" in mp4_url:
|
||||
print(f"最终找到的MP4链接: {mp4_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = mp4_url
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
else:
|
||||
print(f"未找到有效的MP4链接,尝试再次解析页面内容")
|
||||
# 再尝试一次从HTML中广泛搜索所有可能的MP4链接
|
||||
all_mp4_pattern = r'(https?://[^"\']+\.mp4)'
|
||||
all_mp4_matches = re.findall(all_mp4_pattern, html)
|
||||
if all_mp4_matches:
|
||||
mp4_url = all_mp4_matches[0]
|
||||
print(f"从HTML广泛搜索找到MP4链接: {mp4_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = mp4_url
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
print(f"未找到视频链接,返回原episode URL: {episode_url}")
|
||||
result["parse"] = 0
|
||||
result["url"] = episode_url
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"请求或解析失败: {str(e)}")
|
||||
print(traceback.format_exc())
|
||||
result["parse"] = 0
|
||||
result["url"] = id
|
||||
result["header"] = json.dumps(headers)
|
||||
return result
|
||||
|
||||
def localProxy(self, param):
|
||||
# 本地代理处理,此处简单返回传入的参数
|
||||
return [200, "video/MP2T", {}, param]
|
||||
|
||||
def destroy(self):
|
||||
# 资源回收
|
||||
pass
|
||||
225
摸鱼儿/api/金牌影视.py
Normal file
225
摸鱼儿/api/金牌影视.py
Normal file
@@ -0,0 +1,225 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# by @嗷呜
|
||||
import json
|
||||
import sys
|
||||
import threading
|
||||
import uuid
|
||||
import requests
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
import time
|
||||
from Crypto.Hash import MD5, SHA1
|
||||
|
||||
class Spider(Spider):
|
||||
'''
|
||||
配置示例:
|
||||
{
|
||||
"key": "xxxx",
|
||||
"name": "xxxx",
|
||||
"type": 3,
|
||||
"api": ".所在路径/金牌.py",
|
||||
"searchable": 1,
|
||||
"quickSearch": 1,
|
||||
"filterable": 1,
|
||||
"changeable": 1,
|
||||
"ext": {
|
||||
"site": "https://www.jiabaide.cn,域名2,域名3"
|
||||
}
|
||||
},
|
||||
'''
|
||||
def init(self, extend=""):
|
||||
if extend:
|
||||
hosts=json.loads(extend)['site']
|
||||
self.host = self.host_late(hosts)
|
||||
pass
|
||||
|
||||
def getName(self):
|
||||
pass
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
def homeContent(self, filter):
|
||||
cdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/get/filer/type", headers=self.getheaders()).json()
|
||||
fdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/v1/get/filer/list", headers=self.getheaders()).json()
|
||||
result = {}
|
||||
classes = []
|
||||
filters={}
|
||||
for k in cdata['data']:
|
||||
classes.append({
|
||||
'type_name': k['typeName'],
|
||||
'type_id': str(k['typeId']),
|
||||
})
|
||||
sort_values = [{"n": "最近更新", "v": "2"},{"n": "人气高低", "v": "3"}, {"n": "评分高低", "v": "4"}]
|
||||
for tid, d in fdata['data'].items():
|
||||
current_sort_values = sort_values.copy()
|
||||
if tid == '1':
|
||||
del current_sort_values[0]
|
||||
filters[tid] = [
|
||||
{"key": "type", "name": "类型",
|
||||
"value": [{"n": i["itemText"], "v": i["itemValue"]} for i in d["typeList"]]},
|
||||
|
||||
*([] if not d["plotList"] else [{"key": "v_class", "name": "剧情",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]}
|
||||
for i in d["plotList"]]}]),
|
||||
|
||||
{"key": "area", "name": "地区",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["districtList"]]},
|
||||
|
||||
{"key": "year", "name": "年份",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["yearList"]]},
|
||||
|
||||
{"key": "lang", "name": "语言",
|
||||
"value": [{"n": i["itemText"], "v": i["itemText"]} for i in d["languageList"]]},
|
||||
|
||||
{"key": "sort", "name": "排序", "value": current_sort_values}
|
||||
]
|
||||
result['class'] = classes
|
||||
result['filters'] = filters
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
data1 = self.fetch(f"{self.host}/api/mw-movie/anonymous/v1/home/all/list", headers=self.getheaders()).json()
|
||||
data2=self.fetch(f"{self.host}/api/mw-movie/anonymous/home/hotSearch",headers=self.getheaders()).json()
|
||||
data=[]
|
||||
for i in data1['data'].values():
|
||||
data.extend(i['list'])
|
||||
data.extend(data2['data'])
|
||||
vods=self.getvod(data)
|
||||
return {'list':vods}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
|
||||
params = {
|
||||
"area": extend.get('area', ''),
|
||||
"filterStatus": "1",
|
||||
"lang": extend.get('lang', ''),
|
||||
"pageNum": pg,
|
||||
"pageSize": "30",
|
||||
"sort": extend.get('sort', '1'),
|
||||
"sortBy": "1",
|
||||
"type": extend.get('type', ''),
|
||||
"type1": tid,
|
||||
"v_class": extend.get('v_class', ''),
|
||||
"year": extend.get('year', '')
|
||||
}
|
||||
data = self.fetch(f"{self.host}/api/mw-movie/anonymous/video/list?{self.js(params)}", headers=self.getheaders(params)).json()
|
||||
result = {}
|
||||
result['list'] = self.getvod(data['data']['list'])
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 9999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
data=self.fetch(f"{self.host}/api/mw-movie/anonymous/video/detail?id={ids[0]}",headers=self.getheaders({'id':ids[0]})).json()
|
||||
vod=self.getvod([data['data']])[0]
|
||||
vod['vod_play_from']='金牌'
|
||||
vod['vod_play_url'] = '#'.join(
|
||||
f"{i['name'] if len(vod['episodelist']) > 1 else vod['vod_name']}${ids[0]}@@{i['nid']}" for i in
|
||||
vod['episodelist'])
|
||||
vod.pop('episodelist', None)
|
||||
return {'list':[vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
params = {
|
||||
"keyword": key,
|
||||
"pageNum": pg,
|
||||
"pageSize": "8",
|
||||
"sourceCode": "1"
|
||||
}
|
||||
data=self.fetch(f"{self.host}/api/mw-movie/anonymous/video/searchByWord?{self.js(params)}",headers=self.getheaders(params)).json()
|
||||
vods=self.getvod(data['data']['result']['list'])
|
||||
return {'list':vods,'page':pg}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
self.header = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.61 Chrome/126.0.6478.61 Not/A)Brand/8 Safari/537.36',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'DNT': '1',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'Origin': self.host,
|
||||
'Referer': f'{self.host}/'
|
||||
}
|
||||
ids=id.split('@@')
|
||||
pdata = self.fetch(f"{self.host}/api/mw-movie/anonymous/v2/video/episode/url?clientType=1&id={ids[0]}&nid={ids[1]}",headers=self.getheaders({'clientType':'1','id': ids[0], 'nid': ids[1]})).json()
|
||||
vlist=[]
|
||||
for i in pdata['data']['list']:vlist.extend([i['resolutionName'],i['url']])
|
||||
return {'parse':0,'url':vlist,'header':self.header}
|
||||
|
||||
def localProxy(self, param):
|
||||
pass
|
||||
|
||||
def host_late(self, url_list):
|
||||
if isinstance(url_list, str):
|
||||
urls = [u.strip() for u in url_list.split(',')]
|
||||
else:
|
||||
urls = url_list
|
||||
if len(urls) <= 1:
|
||||
return urls[0] if urls else ''
|
||||
|
||||
results = {}
|
||||
threads = []
|
||||
|
||||
def test_host(url):
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = requests.head(url, timeout=1.0, allow_redirects=False)
|
||||
delay = (time.time() - start_time) * 1000
|
||||
results[url] = delay
|
||||
except Exception as e:
|
||||
results[url] = float('inf')
|
||||
for url in urls:
|
||||
t = threading.Thread(target=test_host, args=(url,))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
return min(results.items(), key=lambda x: x[1])[0]
|
||||
|
||||
def md5(self, sign_key):
|
||||
md5_hash = MD5.new()
|
||||
md5_hash.update(sign_key.encode('utf-8'))
|
||||
md5_result = md5_hash.hexdigest()
|
||||
return md5_result
|
||||
|
||||
def js(self, param):
|
||||
return '&'.join(f"{k}={v}" for k, v in param.items())
|
||||
|
||||
def getheaders(self, param=None):
|
||||
if param is None:param = {}
|
||||
t=str(int(time.time()*1000))
|
||||
param['key']='cb808529bae6b6be45ecfab29a4889bc'
|
||||
param['t']=t
|
||||
sha1_hash = SHA1.new()
|
||||
sha1_hash.update(self.md5(self.js(param)).encode('utf-8'))
|
||||
sign = sha1_hash.hexdigest()
|
||||
deviceid = str(uuid.uuid4())
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.61 Chrome/126.0.6478.61 Not/A)Brand/8 Safari/537.36',
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'sign': sign,
|
||||
't': t,
|
||||
'deviceid':deviceid
|
||||
}
|
||||
return headers
|
||||
|
||||
def convert_field_name(self, field):
|
||||
field = field.lower()
|
||||
if field.startswith('vod') and len(field) > 3:
|
||||
field = field.replace('vod', 'vod_')
|
||||
if field.startswith('type') and len(field) > 4:
|
||||
field = field.replace('type', 'type_')
|
||||
return field
|
||||
|
||||
def getvod(self, array):
|
||||
return [{self.convert_field_name(k): v for k, v in item.items()} for item in array]
|
||||
|
||||
Reference in New Issue
Block a user