使用Python实现某茄小说搜索与下载
站长 • 2024-08-15 13:13 • 82 次点击 • 技术教程
注意事项:
- 本项目仅用于学习和研究目的,不得用于任何非法活动或侵犯他人权益的行为。
- 使用本脚本所产生的一切法律责任和风险,均由用户自行承担。
- 在使用本脚本时,请遵守相关法律法规和网站的使用协议。
使用说明:
- 确保你的Python环境中已经安装了requests和parsel库。如果没有安装,可以使用pip进行安装:
[Python] 纯文本查看 复制代码
pip install requests parsel
[Python] 纯文本查看 复制代码
# 导入模块 import requests import parsel import time # 字符编码映射表(用于解密内容) dit_data = { '58670': '0', '58413': '1', '58678': '2', '58371': '3', '58353': '4', '58480': '5', '58359': '6', '58449': '7', '58540': '8', '58692': '9', '58712': 'a', '58542': 'b', '58575': 'c', '58626': 'a', '58691': 'e', '58561': 'f', '58362': 'g', '58619': 'h', '58430': 'i', '58531': 'j', '58588': 'k', '58440': 'l', '58681': 'm', '58631': 'n', '58376': 'o', '58429': 'p', '58555': 'q', '58498': 'r', '58518': 's', '58453': 't', '58397': 'u', '58356': 'v', '58435': 'w', '58514': 'x', '58482': 'y', '58529': 'z', '58515': 'A', '58688': 'B', '58709': 'C', '58344': 'D', '58656': 'E', '58381': 'F', '58576': 'G', '58516': 'H', '58463': 'I', '58649': 'J', '58571': 'K', '58558': 'L', '58433': 'M', '58517': 'N', '58387': 'O', '58687': 'P', '58537': 'Q', '58541': 'R', '58458': 'S', '58390': 'T', '58466': 'U', '58386': 'V', '58697': 'W', '58519': 'X', '58511': 'Y', '58634': 'Z', '58611': '的', '58590': '一', '58398': '是', '58422': '了', '58657': '我', '58666': '不', '58562': '人', '58345': '在', '58510': '他', '58496': '有', '58654': '这', '58441': '个', '58493': '上', '58714': '们', '58618': '来', '58528': '到', '58620': '时', '58403': '大', '58461': '地', '58481': '为', '58700': '子', '58708': '中', '58503': '你', '58442': '说', '58639': '生', '58506': '国', '58663': '年', '58436': '着', '58563': '就', '58391': '那', '58357': '和', '58354': '要', '58695': '她', '58372': '出', '58696': '也', '58551': '得', '58445': '里', '58408': '后', '58599': '自', '58424': '以', '58394': '会', '58348': '家', '58426': '可', '58673': '下', '58417': '而', '58556': '过', '58603': '天', '58565': '去', '58604': '能', '58522': '对', '58632': '小', '58622': '多', '58350': '然', '58605': '于', '58617': '心', '58401': '学', '58637': '么', '58684': '之', '58382': '都', '58464': '好', '58487': '看', '58693': '起', '58608': '发', '58392': '当', '58474': '没', '58601': '成', '58355': '只', '58573': '如', '58499': '事', '58469': '把', '58361': '还', '58698': '用', '58489': '第', '58711': '样', '58457': '道', '58635': '想', '58492': '作', '58647': '种', '58623': '开', '58521': '美', '58609': '总', '58530': '从', '58665': '无', '58652': '情', '58676': '己', '58456': '面', '58581': '最', '58509': '女', '58488': '但', '58363': '现', '58685': '前', '58396': '些', '58523': '所', '58471': '同', '58485': '日', '58613': '手', '58533': '又', '58589': '行', '58527': '意', '58593': '动', '58699': '方', '58707': '期', '58414': '它', '58596': '头', '58570': '经', '58660': '长', '58364': '儿', '58526': '回', '58501': '位', '58638': '分', '58404': '爱', '58677': '老', '58535': '因', '58629': '很', '58577': '给', '58606': '名', '58497': '法', '58662': '间', '58479': '斯', '58532': '知', '58380': '世', '58385': '什', '58405': '两', '58644': '次', '58578': '使', '58505': '身', '58564': '者', '58412': '被', '58686': '高', '58624': '已', '58667': '亲', '58607': '其', '58616': '进', '58368': '此', '58427': '话', '58423': '常', '58633': '与', '58525': '活', '58543': '正', '58418': '感', '58597': '见', '58683': '明', '58507': '问', '58621': '力', '58703': '理', '58438': '尔', '58536': '点', '58384': '文', '58484': '几', '58539': '定', '58554': '本', '58421': '公', '58347': '特', '58569': '做', '58710': '外', '58574': '孩', '58375': '相', '58645': '西', '58592': '果', '58572': '走', '58388': '将', '58370': '月', '58399': '十', '58651': '实', '58546': '向', '58504': '声', '58419': '车', '58407': '全', '58672': '信', '58675': '重', '58538': '三', '58465': '机', '58374': '工', '58579': '物', '58402': '气', '58702': '每', '58553': '并', '58360': '别', '58389': '真', '58560': '打', '58690': '太', '58473': '新', '58512': '比', '58653': '才', '58704': '便', '58545': '夫', '58641': '再', '58475': '书', '58583': '部', '58472': '水', '58478': '像', '58664': '眼', '58586': '等', '58568': '体', '58674': '却', '58490': '加', '58476': '电', '58346': '主', '58630': '界', '58595': '门', '58502': '利', '58713': '海', '58587': '受', '58548': '听', '58351': '表', '58547': '德', '58443': '少', '58460': '克', '58636': '代', '58585': '员', '58625': '许', '58694': '陵', '58428': '先', '58640': '口', '58628': '由', '58612': '死', '58446': '安', '58468': '写', '58410': '性', '58508': '马', '58594': '光', '58483': '白', '58544': '或', '58495': '住', '58450': '难', '58643': '望', '58486': '教', '58406': '命', '58447': '花', '58669': '结', '58415': '乐', '58444': '色', '58549': '更', '58494': '拉', '58409': '东', '58658': '神', '58557': '记', '58602': '处', '58559': '让', '58610': '母', '58513': '父', '58500': '应', '58378': '直', '58680': '字', '58352': '场', '58383': '平', '58454': '报', '58671': '友', '58668': '关', '58452': '放', '58627': '至', '58400': '张', '58455': '认', '58416': '接', '58552': '告', '58614': '入', '58582': '笑', '58534': '内', '58701': '英', '58349': '军', '58491': '候', '58467': '民', '58365': '岁', '58598': '往', '58425': '何', '58462': '度', '58420': '山', '58661': '觉', '58615': '路', '58648': '带', '58470': '万', '58377': '男', '58520': '边', '58646': '风', '58600': '解', '58431': '叫', '58715': '任', '58524': '金', '58439': '快', '58566': '原', '58477': '吃', '58642': '妈', '58437': '变', '58411': '通', '58451': '师', '58395': '立', '58369': '象', '58706': '数', '58705': '四', '58379': '失', '58567': '满', '58373': '战', '58448': '远', '58659': '格', '58434': '士', '58679': '音', '58432': '轻', '58689': '目', '58591': '条', '58682': '呢', } # 模拟浏览器 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36' } # 打印版权和免责声明 print("\n\n \033[31m免责声明:\033[0m") print(" 本程序仅用于学习和研究Python网络爬虫和网页处理技术") print(" 本程序不得用于任何非法活动或侵犯他人权益的行为") print(" 使用本程序所产生的一切法律责任和风险,均由用户自行承担") print(" 作者不对因使用该程序而导致的任何损失或损害承担任何责任") print("\n\n") def search_and_download(): # 搜索功能 while True: key = input('请输入书名或者作者名进行搜索:') if key.strip(): # 检查是否为空 break else: print('\033[31m请输入有效的书名或作者名!\033[0m') print('\n\033[31m正在搜索中,请稍等....\033[0m') info = [] for i in range(3): # 假设每页显示10本书,最多搜索3页 search_url = f'https://novel.snssdk.com/api/novel/channel/homepage/search/search/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab&aid=1967&offset={i * 10}&q={key}' try: search_data = requests.get(url=search_url, headers=headers).json() except requests.exceptions.RequestException as e: print(f'搜索请求失败: {e}') return # 返回搜索 if search_data['message'] != 'success': print(search_data['message']) continue for book in search_data['data']['ret_data']: if book['author'] not in ['番茄漫画', '番茄畅听']: book_id = book['book_id'] # 获取章节数量 book_url = f"https://fanqienovel.com/page/{book_id}?enter_from=search" try: book_response = requests.get(book_url, headers=headers) book_selector = parsel.Selector(book_response.text) chapter_count = len(book_selector.css('.chapter-item')) except requests.exceptions.RequestException as e: print(f"获取章节数量失败: {e}") continue # 跳过该书籍,继续搜索下一个 dit = { 'title': book['title'], 'author': book['author'], 'book_id': book_id, 'chapter_count': chapter_count } info.append(dit) if not search_data['data']['has_more']: break # 判断搜索结果中是否有章节数量为0的书籍 all_zero_chapters = all(book['chapter_count'] == 0 for book in info) # 展示搜索结果(可选) print("\n搜索结果:") for i, book in enumerate(info): print(f"{i + 1}. {book['title']} - 作者:{book['author']} (共{book['chapter_count']}章) ID: {book['book_id']}") if all_zero_chapters: print("\033[31m所有搜索到的书籍都没有章节,网络请求受限,请更换IP!!!\033[0m") return # 返回搜索 while True: try: selected_index = int(input('\n\033[31m请输入要下载的书籍序号\033[0m(输入0返回搜索):')) - 1 # 序号从1开始,数组从0开始 if selected_index == -1: return # 返回搜索 elif selected_index >= 0 and selected_index < len(info): break # 输入有效序号 else: print("无效的序号!") except ValueError: print("请输入数字!") # 获取小说名字和章节 while True: # 添加循环,如果获取小说信息失败,重新输入序号 try: book_id = info[selected_index]['book_id'] url = f'https://fanqienovel.com/page/{book_id}?enter_from=search' response = requests.get(url=url, headers=headers) response.raise_for_status() # 检查请求是否成功 break # 获取小说信息成功,退出循环 except requests.exceptions.RequestException as e: print(f'获取小说信息失败: {e}') print('\033[31m该小说没有章节,请重新选择序号下载\033[0m') while True: try: selected_index = int(input('\n\033[31m请输入要下载的书籍序号\033[0m(输入0返回搜索):')) - 1 if selected_index == -1: return # 返回搜索 elif selected_index >= 0 and selected_index < len(info): break else: print("无效的序号!") except ValueError: print("请输入数字!") html_data = response.text # 解析数据 selector = parsel.Selector(html_data) # 小说名称(直接写入代码中) name = selector.css('.info-name h1::text').get() # 提取章节 chapter_items = selector.css('.chapter-item') # 保存章节标题和链接的列表 title_href_list = [] # 遍历章节列表,提取章节标题和链接 for item in chapter_items: title = item.css('.chapter-item-title::text').get() href = item.css('.chapter-item-title::attr(href)').get() title_href_list.append((title, href)) # 使用字典存储章节标题和链接,并使用章节标题作为键 chapter_dict = {title: href for title, href in title_href_list} # 获取章节序号 chapter_order = list(chapter_dict.keys()) # 打印正在下载信息 print(f"\n==========\033[31m {name} - 作者:{book['author']},共 {len(chapter_order)} 章,ID: {book_id}\033[0m ==========\n") # 开始下载时间 start_time = time.time() # 遍历章节字典,按照章节序号下载 download_success = True for i, title in enumerate(chapter_order, 1): # 从1开始计数 print(f'正在下载 {i}/{len(chapter_order)}: {title}') href = chapter_dict[title] link_url = 'https://fanqienovel.com' + href + '?enter_from=page' try: # 发送请求 获取数据 link_data = requests.get(url=link_url, headers=headers).text # 解析数据 link_selector = parsel.Selector(link_data) # 提取小说内容 content_list = link_selector.css('.muye-reader-content-16 p::text').getall() # 列表合并字符串 content = '\n\n'.join(content_list) # 字典解码 novel_content = '' for index in content: try: word = dit_data[str(ord(index))] except: word = index novel_content += word with open(name + '.txt', mode='a', encoding='utf-8') as f: f.write(title) f.write('\n\n') f.write(novel_content) f.write('\n\n') except requests.exceptions.RequestException as e: print(f'获取章节内容失败: {e}') download_success = False # 设置下载失败标志 # 结束下载时间 end_time = time.time() # 计算下载耗时 download_duration = end_time - start_time # 判断耗时是否超过一分钟 if download_duration < 60: print(f'下载耗时: {download_duration:.2f} 秒') else: minutes = int(download_duration // 60) seconds = int(download_duration % 60) print(f'下载耗时: {minutes} 分 {seconds} 秒') if download_success: print(f'=============== {name} 下载完成,存放在软件目录里 ==========\n') else: print(f'=============== {name} 下载失败 ==========\n') while True: search_and_download()