([\s\S]*?)\|.*

([\s\S]*)

') # 用来提取搜索页面的album的信息 pattern_html_search_album_info_list = compile( r']*>[^>]*?' r'title="(.*?)"[^>]*>[ \n]*[ \n]*' r'

([\s\S]*?)' r'

' ) # 用来查找tag列表 pattern_html_search_tags = compile(r']*?>(.*?)') # 查找错误，例如 [错误，關鍵字過短，請至少輸入兩個字以上。] pattern_html_search_error = compile(r'

\n(.*?)\n

') pattern_html_search_total = compile(r'class="text-white">(\d+) A漫.'), 0 # 收藏页面的本子结果 pattern_html_favorite_content = compile( r'

[\s\S]*?' r'

([^<]*?)' r'

' ) # 收藏夹的收藏总数 pattern_html_favorite_total = compile(r' : (\d+)[^/]*/\D*(\d+)') # 所有的收藏夹 pattern_html_favorite_folder_list = [ compile(r''), compile(r'') ] @classmethod def parse_html_to_search_page(cls, html: str) -> JmSearchPage: # 1. 检查是否失败 PatternTool.require_not_match( html, cls.pattern_html_search_error, msg_func=lambda match: '{}: {}'.format(match[1], match[2]) ) # 2. 缩小文本范围 html = PatternTool.require_match( html, cls.pattern_html_search_shorten_for, msg='未匹配到搜索结果', ) # 3. 提取结果 content = [] # content这个名字来源于api版搜索返回值 total = int(PatternTool.match_or_default(html, *cls.pattern_html_search_total)) # 总结果数 album_info_list = cls.pattern_html_search_album_info_list.findall(html) for (album_id, title, _label_category_text, tag_text) in album_info_list: # 从label_category_text中可以解析出label-category和label-sub # 这里不作解析，因为没什么用... tags = cls.pattern_html_search_tags.findall(tag_text) content.append(( album_id, { 'name': title, # 改成name是为了兼容 parse_api_resp_to_page 'tags': tags } )) return JmSearchPage(content, total) @classmethod def parse_html_to_category_page(cls, html: str) -> JmSearchPage: content = [] total = int(PatternTool.match_or_default(html, *cls.pattern_html_search_total)) album_info_list = cls.pattern_html_category_album_info_list.findall(html) for (album_id, title, tag_text) in album_info_list: tags = cls.pattern_html_search_tags.findall(tag_text) content.append(( album_id, { 'name': title, # 改成name是为了兼容 parse_api_resp_to_page 'tags': tags } )) return JmSearchPage(content, total) @classmethod def parse_html_to_favorite_page(cls, html: str) -> JmFavoritePage: total = int(PatternTool.require_match( html, cls.pattern_html_favorite_total, '未匹配到收藏夹的本子总数', )) # 收藏夹的本子结果 content = cls.pattern_html_favorite_content.findall(html) content = [ (aid, {'name': atitle}) for aid, atitle in content ] # 匹配收藏夹列表 p1, p2 = cls.pattern_html_favorite_folder_list folder_list_text = PatternTool.require_match(html, p1, '未匹配到收藏夹列表') folder_list_raw = p2.findall(folder_list_text) folder_list = [{'name': fname, 'FID': fid} for fid, fname in folder_list_raw] return JmFavoritePage(content, folder_list, total) @classmethod def parse_api_to_search_page(cls, data: AdvancedDict) -> JmSearchPage: """ model_data: { "search_query": "MANA", "total": "177", "content": [ { "id": "441923", "author": "MANA", "description": "", "name": "[MANA] 神里绫华5", "image": "", "category": { "id": "1", "title": "同人" }, "category_sub": { "id": "1", "title": "同人" } } ] } """ total: int = int(data.total or 0) # 2024.1.5 data.total可能为None content = cls.adapt_content(data.content) return JmSearchPage(content, total) @classmethod def parse_api_to_favorite_page(cls, data: AdvancedDict) -> JmFavoritePage: """ { "list": [ { "id": "363859", "author": "紺菓", "description": "", "name": "[無邪氣漢化組] (C99) [紺色果實 (紺菓)] サレンの樂しい夢 (プリンセスコネクト!Re:Dive) [中國翻譯]", "latest_ep": null, "latest_ep_aid": null, "image": "", "category": { "id": "1", "title": "同人" }, "category_sub": { "id": "1", "title": "同人" } } ], "folder_list": [ { "0": "123", "FID": "123", "1": "456", "UID": "456", "2": "收藏夹名", "name": "收藏夹名" } ], "total": "87", "count": 20 } """ total: int = int(data.total) # count: int = int(data.count) content = cls.adapt_content(data.list) folder_list = data.get('folder_list', []) return JmFavoritePage(content, folder_list, total) @classmethod def adapt_content(cls, content): def adapt_item(item: AdvancedDict): item: dict = item.src_dict item.setdefault('tags', []) return item content = [ (item.id, adapt_item(item)) for item in content ] return content class JmApiAdaptTool: """ 本类负责把移动端的api返回值，适配为标准的实体类 # album { "id": 123, "name": "[狗野叉漢化]", "author": [ "AREA188" ], "images": [ "00004.webp" ], "description": null, "total_views": "41314", "likes": "918", "series": [], "series_id": "0", "comment_total": "5", "tags": [ "全彩", "中文" ], "works": [], "actors": [], "related_list": [ { "id": "333718", "author": "been", "description": "", "name": "[been]The illusion of lies（1）[中國語][無修正][全彩]", "image": "" } ], "liked": false, "is_favorite": false } # photo { "id": 413446, "series": [ { "id": "487043", "name": "第48話", "sort": "48" } ], "tags": "慾望調教 NTL 地鐵戲劇", "name": "癡漢成癮-第2話", "images": [ "00047.webp" ], "series_id": "400222", "is_favorite": false, "liked": false } """ field_adapter = { JmAlbumDetail: [ 'likes', 'tags', 'works', 'actors', 'related_list', 'name', ('id', 'album_id'), ('author', 'authors'), ('total_views', 'views'), ('comment_total', 'comment_count'), ], JmPhotoDetail: [ 'name', 'series_id', 'tags', ('id', 'photo_id'), ('images', 'page_arr'), ] } @classmethod def parse_entity(cls, data: dict, clazz: type): adapter = cls.get_adapter(clazz) fields = {} for k in adapter: if isinstance(k, str): v = data[k] fields[k] = v elif isinstance(k, tuple): k, rename_k = k v = data[k] fields[rename_k] = v if issubclass(clazz, JmAlbumDetail): cls.post_adapt_album(data, clazz, fields) else: cls.post_adapt_photo(data, clazz, fields) return clazz(**fields) @classmethod def get_adapter(cls, clazz: type): for k, v in cls.field_adapter.items(): if issubclass(clazz, k): return v ExceptionTool.raises(f'不支持的类型: {clazz}') @classmethod def post_adapt_album(cls, data: dict, _clazz: type, fields: dict): series = data['series'] episode_list = [] for chapter in series: chapter = AdvancedDict(chapter) # photo_id, photo_index, photo_title, photo_pub_date episode_list.append( (chapter.id, chapter.sort, chapter.name) ) fields['episode_list'] = episode_list for it in 'scramble_id', 'page_count', 'pub_date', 'update_date': fields[it] = '0' @classmethod def post_adapt_photo(cls, data: dict, _clazz: type, fields: dict): # 1. 获取sort字段，如果data['series']中没有，使用默认值1 sort = 1 series: list = data['series'] # series中的sort从1开始 for chapter in series: chapter = AdvancedDict(chapter) if int(chapter.id) == int(data['id']): sort = chapter.sort break fields['sort'] = sort import random fields['data_original_domain'] = random.choice(JmModuleConfig.DOMAIN_IMAGE_LIST) class JmImageTool: @classmethod def save_resp_img(cls, resp: Any, filepath: str, need_convert=True): """ 接收HTTP响应对象，将其保存到图片文件. 如果需要改变图片的文件格式，比如 .jpg → .png，则需要指定参数 neet_convert=True. 如果不需要改变图片的文件格式，使用 need_convert=False，可以跳过PIL解析图片，效率更高. :param resp: JmImageResp :param filepath: 图片文件路径 :param need_convert: 是否转换图片 """ if need_convert is False: cls.save_directly(resp, filepath) else: cls.save_image(cls.open_image(resp.content), filepath) @classmethod def save_image(cls, image: Image, filepath: str): """ 保存图片 :param image: PIL.Image对象 :param filepath: 保存文件路径 """ image.save(filepath) @classmethod def save_directly(cls, resp, filepath): from common import save_resp_content save_resp_content(resp, filepath) @classmethod def decode_and_save(cls, num: int, img_src: Image, decoded_save_path: str ) -> None: """ 解密图片并保存 :param num: 分割数，可以用 cls.calculate_segmentation_num 计算 :param img_src: 原始图片 :param decoded_save_path: 解密图片的保存路径 """ # 无需解密，直接保存 if num == 0: cls.save_image(img_src, decoded_save_path) return import math w, h = img_src.size # 创建新的解密图片 img_decode = Image.new("RGB", (w, h)) over = h % num for i in range(num): move = math.floor(h / num) y_src = h - (move * (i + 1)) - over y_dst = move * i if i == 0: move += over else: y_dst += over img_decode.paste( img_src.crop(( 0, y_src, w, y_src + move )), ( 0, y_dst, w, y_dst + move ) ) # save every step result # cls.save_image(img_decode, change_file_name( # decoded_save_path, # f'{of_file_name(decoded_save_path, trim_suffix=True)}_{i}{of_file_suffix(decoded_save_path)}' # )) # 保存到新的解密文件 cls.save_image(img_decode, decoded_save_path) @classmethod def open_image(cls, fp: Union[str, bytes]): from io import BytesIO fp = fp if isinstance(fp, str) else BytesIO(fp) return Image.open(fp) @classmethod def get_num(cls, scramble_id, aid, filename: str) -> int: """ 获得图片分割数 """ scramble_id = int(scramble_id) aid = int(aid) if aid < scramble_id: return 0 elif aid < JmMagicConstants.SCRAMBLE_268850: return 10 else: import hashlib x = 10 if aid < JmMagicConstants.SCRAMBLE_421926 else 8 s = f"{aid}{filename}" # 拼接 s = s.encode() s = hashlib.md5(s).hexdigest() num = ord(s[-1]) num %= x num = num * 2 + 2 return num @classmethod def get_num_by_url(cls, scramble_id, url) -> int: """ 获得图片分割数 """ return cls.get_num( scramble_id, aid=JmcomicText.parse_to_jm_id(url), filename=of_file_name(url, True), ) @classmethod def get_num_by_detail(cls, detail: JmImageDetail) -> int: """ 获得图片分割数 """ return cls.get_num(detail.scramble_id, detail.aid, detail.img_file_name) class JmCryptoTool: """ 禁漫加解密相关逻辑 """ @classmethod def token_and_tokenparam(cls, ts, ver=None, secret=None, ): """ 计算禁漫接口的请求headers的token和tokenparam :param ts: 时间戳 :param ver: app版本 :param secret: 密钥 :return (token, tokenparam) """ if ver is None: ver = JmMagicConstants.APP_VERSION if secret is None: secret = JmMagicConstants.APP_TOKEN_SECRET # tokenparam: 1700566805,1.6.3 tokenparam = '{},{}'.format(ts, ver) # token: 81498a20feea7fbb7149c637e49702e3 token = cls.md5hex(f'{ts}{secret}') return token, tokenparam @classmethod def decode_resp_data(cls, data: str, ts, secret=None, ) -> str: """ 解密接口返回值 :param data: resp.json()['data'] :param ts: 时间戳 :param secret: 密钥 :return: json格式的字符串 """ if secret is None: secret = JmMagicConstants.APP_DATA_SECRET # 1. base64解码 import base64 data_b64 = base64.b64decode(data) # 2. AES-ECB解密 key = cls.md5hex(f'{ts}{secret}').encode('utf-8') from Crypto.Cipher import AES data_aes = AES.new(key, AES.MODE_ECB).decrypt(data_b64) # 3. 移除末尾的padding data = data_aes[:-data_aes[-1]] # 4. 解码为字符串 (json) res = data.decode('utf-8') return res @classmethod def md5hex(cls, key: str): ExceptionTool.require_true(isinstance(key, str), 'key参数需为字符串') from hashlib import md5 return md5(key.encode("utf-8")).hexdigest()