')
# 查找错误,例如 [错误,關鍵字過短,請至少輸入兩個字以上。]
pattern_html_search_error = compile(r'
')
pattern_html_search_total = compile(r'class="text-white">(\d+) A漫.'), 0
# 收藏页面的本子结果
pattern_html_favorite_content = compile(
r'
[\s\S]*?'
r'
([^<]*?)'
r'
'
)
# 收藏夹的收藏总数
pattern_html_favorite_total = compile(r' : (\d+)[^/]*/\D*(\d+)')
# 所有的收藏夹
pattern_html_favorite_folder_list = [
compile(r'
'),
compile(r'
')
]
@classmethod
def parse_html_to_search_page(cls, html: str) -> JmSearchPage:
# 1. 检查是否失败
PatternTool.require_not_match(
html,
cls.pattern_html_search_error,
msg_func=lambda match: '{}: {}'.format(match[1], match[2])
)
# 2. 缩小文本范围
html = PatternTool.require_match(
html,
cls.pattern_html_search_shorten_for,
msg='未匹配到搜索结果',
)
# 3. 提取结果
content = [] # content这个名字来源于api版搜索返回值
total = int(PatternTool.match_or_default(html, *cls.pattern_html_search_total)) # 总结果数
album_info_list = cls.pattern_html_search_album_info_list.findall(html)
for (album_id, title, _label_category_text, tag_text) in album_info_list:
# 从label_category_text中可以解析出label-category和label-sub
# 这里不作解析,因为没什么用...
tags = cls.pattern_html_search_tags.findall(tag_text)
content.append((
album_id, {
'name': title, # 改成name是为了兼容 parse_api_resp_to_page
'tags': tags
}
))
return JmSearchPage(content, total)
@classmethod
def parse_html_to_category_page(cls, html: str) -> JmSearchPage:
content = []
total = int(PatternTool.match_or_default(html, *cls.pattern_html_search_total))
album_info_list = cls.pattern_html_category_album_info_list.findall(html)
for (album_id, title, tag_text) in album_info_list:
tags = cls.pattern_html_search_tags.findall(tag_text)
content.append((
album_id, {
'name': title, # 改成name是为了兼容 parse_api_resp_to_page
'tags': tags
}
))
return JmSearchPage(content, total)
@classmethod
def parse_html_to_favorite_page(cls, html: str) -> JmFavoritePage:
total = int(PatternTool.require_match(
html,
cls.pattern_html_favorite_total,
'未匹配到收藏夹的本子总数',
))
# 收藏夹的本子结果
content = cls.pattern_html_favorite_content.findall(html)
content = [
(aid, {'name': atitle})
for aid, atitle in content
]
# 匹配收藏夹列表
p1, p2 = cls.pattern_html_favorite_folder_list
folder_list_text = PatternTool.require_match(html, p1, '未匹配到收藏夹列表')
folder_list_raw = p2.findall(folder_list_text)
folder_list = [{'name': fname, 'FID': fid} for fid, fname in folder_list_raw]
return JmFavoritePage(content, folder_list, total)
@classmethod
def parse_api_to_search_page(cls, data: AdvancedDict) -> JmSearchPage:
"""
model_data: {
"search_query": "MANA",
"total": "177",
"content": [
{
"id": "441923",
"author": "MANA",
"description": "",
"name": "[MANA] 神里绫华5",
"image": "",
"category": {
"id": "1",
"title": "同人"
},
"category_sub": {
"id": "1",
"title": "同人"
}
}
]
}
"""
total: int = int(data.total or 0) # 2024.1.5 data.total可能为None
content = cls.adapt_content(data.content)
return JmSearchPage(content, total)
@classmethod
def parse_api_to_favorite_page(cls, data: AdvancedDict) -> JmFavoritePage:
"""
{
"list": [
{
"id": "363859",
"author": "紺菓",
"description": "",
"name": "[無邪氣漢化組] (C99) [紺色果實 (紺菓)] サレンの樂しい夢 (プリンセスコネクト!Re:Dive) [中國翻譯]",
"latest_ep": null,
"latest_ep_aid": null,
"image": "",
"category": {
"id": "1",
"title": "同人"
},
"category_sub": {
"id": "1",
"title": "同人"
}
}
],
"folder_list": [
{
"0": "123",
"FID": "123",
"1": "456",
"UID": "456",
"2": "收藏夹名",
"name": "收藏夹名"
}
],
"total": "87",
"count": 20
}
"""
total: int = int(data.total)
# count: int = int(data.count)
content = cls.adapt_content(data.list)
folder_list = data.get('folder_list', [])
return JmFavoritePage(content, folder_list, total)
@classmethod
def adapt_content(cls, content):
def adapt_item(item: AdvancedDict):
item: dict = item.src_dict
item.setdefault('tags', [])
return item
content = [
(item.id, adapt_item(item)) for item in content
]
return content
class JmApiAdaptTool:
"""
本类负责把移动端的api返回值,适配为标准的实体类
# album
{
"id": 123,
"name": "[狗野叉漢化]",
"author": [
"AREA188"
],
"images": [
"00004.webp"
],
"description": null,
"total_views": "41314",
"likes": "918",
"series": [],
"series_id": "0",
"comment_total": "5",
"tags": [
"全彩",
"中文"
],
"works": [],
"actors": [],
"related_list": [
{
"id": "333718",
"author": "been",
"description": "",
"name": "[been]The illusion of lies(1)[中國語][無修正][全彩]",
"image": ""
}
],
"liked": false,
"is_favorite": false
}
# photo
{
"id": 413446,
"series": [
{
"id": "487043",
"name": "第48話",
"sort": "48"
}
],
"tags": "慾望 調教 NTL 地鐵 戲劇",
"name": "癡漢成癮-第2話",
"images": [
"00047.webp"
],
"series_id": "400222",
"is_favorite": false,
"liked": false
}
"""
field_adapter = {
JmAlbumDetail: [
'likes',
'tags',
'works',
'actors',
'related_list',
'name',
('id', 'album_id'),
('author', 'authors'),
('total_views', 'views'),
('comment_total', 'comment_count'),
],
JmPhotoDetail: [
'name',
'series_id',
'tags',
('id', 'photo_id'),
('images', 'page_arr'),
]
}
@classmethod
def parse_entity(cls, data: dict, clazz: type):
adapter = cls.get_adapter(clazz)
fields = {}
for k in adapter:
if isinstance(k, str):
v = data[k]
fields[k] = v
elif isinstance(k, tuple):
k, rename_k = k
v = data[k]
fields[rename_k] = v
if issubclass(clazz, JmAlbumDetail):
cls.post_adapt_album(data, clazz, fields)
else:
cls.post_adapt_photo(data, clazz, fields)
return clazz(**fields)
@classmethod
def get_adapter(cls, clazz: type):
for k, v in cls.field_adapter.items():
if issubclass(clazz, k):
return v
ExceptionTool.raises(f'不支持的类型: {clazz}')
@classmethod
def post_adapt_album(cls, data: dict, _clazz: type, fields: dict):
series = data['series']
episode_list = []
for chapter in series:
chapter = AdvancedDict(chapter)
# photo_id, photo_index, photo_title, photo_pub_date
episode_list.append(
(chapter.id, chapter.sort, chapter.name)
)
fields['episode_list'] = episode_list
for it in 'scramble_id', 'page_count', 'pub_date', 'update_date':
fields[it] = '0'
@classmethod
def post_adapt_photo(cls, data: dict, _clazz: type, fields: dict):
# 1. 获取sort字段,如果data['series']中没有,使用默认值1
sort = 1
series: list = data['series'] # series中的sort从1开始
for chapter in series:
chapter = AdvancedDict(chapter)
if int(chapter.id) == int(data['id']):
sort = chapter.sort
break
fields['sort'] = sort
import random
fields['data_original_domain'] = random.choice(JmModuleConfig.DOMAIN_IMAGE_LIST)
class JmImageTool:
@classmethod
def save_resp_img(cls, resp: Any, filepath: str, need_convert=True):
"""
接收HTTP响应对象,将其保存到图片文件.
如果需要改变图片的文件格式,比如 .jpg → .png,则需要指定参数 neet_convert=True.
如果不需要改变图片的文件格式,使用 need_convert=False,可以跳过PIL解析图片,效率更高.
:param resp: JmImageResp
:param filepath: 图片文件路径
:param need_convert: 是否转换图片
"""
if need_convert is False:
cls.save_directly(resp, filepath)
else:
cls.save_image(cls.open_image(resp.content), filepath)
@classmethod
def save_image(cls, image: Image, filepath: str):
"""
保存图片
:param image: PIL.Image对象
:param filepath: 保存文件路径
"""
image.save(filepath)
@classmethod
def save_directly(cls, resp, filepath):
from common import save_resp_content
save_resp_content(resp, filepath)
@classmethod
def decode_and_save(cls,
num: int,
img_src: Image,
decoded_save_path: str
) -> None:
"""
解密图片并保存
:param num: 分割数,可以用 cls.calculate_segmentation_num 计算
:param img_src: 原始图片
:param decoded_save_path: 解密图片的保存路径
"""
# 无需解密,直接保存
if num == 0:
cls.save_image(img_src, decoded_save_path)
return
import math
w, h = img_src.size
# 创建新的解密图片
img_decode = Image.new("RGB", (w, h))
over = h % num
for i in range(num):
move = math.floor(h / num)
y_src = h - (move * (i + 1)) - over
y_dst = move * i
if i == 0:
move += over
else:
y_dst += over
img_decode.paste(
img_src.crop((
0, y_src,
w, y_src + move
)),
(
0, y_dst,
w, y_dst + move
)
)
# save every step result
# cls.save_image(img_decode, change_file_name(
# decoded_save_path,
# f'{of_file_name(decoded_save_path, trim_suffix=True)}_{i}{of_file_suffix(decoded_save_path)}'
# ))
# 保存到新的解密文件
cls.save_image(img_decode, decoded_save_path)
@classmethod
def open_image(cls, fp: Union[str, bytes]):
from io import BytesIO
fp = fp if isinstance(fp, str) else BytesIO(fp)
return Image.open(fp)
@classmethod
def get_num(cls, scramble_id, aid, filename: str) -> int:
"""
获得图片分割数
"""
scramble_id = int(scramble_id)
aid = int(aid)
if aid < scramble_id:
return 0
elif aid < JmMagicConstants.SCRAMBLE_268850:
return 10
else:
import hashlib
x = 10 if aid < JmMagicConstants.SCRAMBLE_421926 else 8
s = f"{aid}{filename}" # 拼接
s = s.encode()
s = hashlib.md5(s).hexdigest()
num = ord(s[-1])
num %= x
num = num * 2 + 2
return num
@classmethod
def get_num_by_url(cls, scramble_id, url) -> int:
"""
获得图片分割数
"""
return cls.get_num(
scramble_id,
aid=JmcomicText.parse_to_jm_id(url),
filename=of_file_name(url, True),
)
@classmethod
def get_num_by_detail(cls, detail: JmImageDetail) -> int:
"""
获得图片分割数
"""
return cls.get_num(detail.scramble_id, detail.aid, detail.img_file_name)
class JmCryptoTool:
"""
禁漫加解密相关逻辑
"""
@classmethod
def token_and_tokenparam(cls,
ts,
ver=None,
secret=None,
):
"""
计算禁漫接口的请求headers的token和tokenparam
:param ts: 时间戳
:param ver: app版本
:param secret: 密钥
:return (token, tokenparam)
"""
if ver is None:
ver = JmMagicConstants.APP_VERSION
if secret is None:
secret = JmMagicConstants.APP_TOKEN_SECRET
# tokenparam: 1700566805,1.6.3
tokenparam = '{},{}'.format(ts, ver)
# token: 81498a20feea7fbb7149c637e49702e3
token = cls.md5hex(f'{ts}{secret}')
return token, tokenparam
@classmethod
def decode_resp_data(cls,
data: str,
ts,
secret=None,
) -> str:
"""
解密接口返回值
:param data: resp.json()['data']
:param ts: 时间戳
:param secret: 密钥
:return: json格式的字符串
"""
if secret is None:
secret = JmMagicConstants.APP_DATA_SECRET
# 1. base64解码
import base64
data_b64 = base64.b64decode(data)
# 2. AES-ECB解密
key = cls.md5hex(f'{ts}{secret}').encode('utf-8')
from Crypto.Cipher import AES
data_aes = AES.new(key, AES.MODE_ECB).decrypt(data_b64)
# 3. 移除末尾的padding
data = data_aes[:-data_aes[-1]]
# 4. 解码为字符串 (json)
res = data.decode('utf-8')
return res
@classmethod
def md5hex(cls, key: str):
ExceptionTool.require_true(isinstance(key, str), 'key参数需为字符串')
from hashlib import md5
return md5(key.encode("utf-8")).hexdigest()