1000字范文,内容丰富有趣,学习的好帮手!
1000字范文 > Python爬虫学习(七)执行定时任务爬取12306余票信息发送邮件通知

Python爬虫学习(七)执行定时任务爬取12306余票信息发送邮件通知

时间:2022-09-04 12:46:47

相关推荐

Python爬虫学习(七)执行定时任务爬取12306余票信息发送邮件通知

分析

我们输入的上海,北京都变成了对应的编号,比如,上海(SHH)、北京(BJP),所以当我们程序进行输入的时候要进行一下处理,12306的一个地方存储着这些城市名与编码对应的文档:

GET请求: /otn/resources/js/framework/station_name.js?station_version=1.8971

点击查询,通过F12找到接口:

GET请求Request URL: /otn/leftTicket/query?leftTicketDTO.train_date=-12-31&leftTicketDTO.from_station=SHH&leftTicketDTO.to_station=BXP&purpose_codes=ADULT

leftTicketDTO.train_date:查询的日期 -12-31leftTicketDTO.from_station:查询的出发地 SHH上海leftTicketDTO.to_station:查询的目的地BXP北京 郑州ZZFpurpose_codes:不太清楚这个字段是用来做什么的,就默认吧

整体流程:

执行定时任务,查询12306余票信息,如果有合适的车次,就发送邮件通知。

1、查询余票信息

查询城市编码与城市名称对应信息。

# {'VAP': '北京北'}def getCityCode1():url = "/otn/resources/js/framework/station_name.js?station_version=1.8971"response = requests.get(url, verify=False)# 将车站的名字和编码进行提取chezhan = re.findall(r'([\u4e00-\u9fa5]+)\|([A-Z]+)', response.text)chezhan_code = dict(chezhan)# 进行交换chezhan_names = dict(zip(chezhan_code.values(),chezhan_code.keys() ))# 打印出得到的车站字典print(chezhan_names) # {'VAP': '北京北', 'BOP': '北京东',return chezhan_names# {'北京北':'VAP'}def getCityCode2():url = "/otn/resources/js/framework/station_name.js?station_version=1.8971"response = requests.get(url, verify=False)# 将车站的名字和编码进行提取chezhan = re.findall(r'([\u4e00-\u9fa5]+)\|([A-Z]+)', response.text)chezhan_code = dict(chezhan)# 进行交换chezhan_names = dict(zip(chezhan_code.keys(),chezhan_code.values() ))# 打印出得到的车站字典print('getCityCode2()',chezhan_names)return chezhan_names

解析余票信息数据

def getdata(html):chezhan_names = getCityCode1()html = json.loads(html)# 定义一个空列表ticketList = []for i in html['data']['result']:# 创建一个字典,用于存放字段信息name = [" 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座","一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"]# 创建一个字典 用于存放车次信息data = {" 车次 ": '',"出发车站": '',"到达车站": '',"出发时间": '',"到达时间": ''," 历时 ": '',"商务座": '',"一等座": '',"二等座": '',"高级软卧": '',"软卧": '',"动卧": '',"硬卧": '',"软座": '',"硬座": '',"无座": '',"其他": '',"备注": ''}# 将各项信息提取并赋值item = i.split('|') # 使用“|”进行分割data[" 车次 "] = item[3] # 获取车次信息,在3号位置data["出发车站"] = chezhan_names[item[6]] # 始发站信息在6号位置data["到达车站"] = chezhan_names[item[7]] # 终点站信息在7号位置data["出发时间"] = item[8] # 出发时间在8号位置data["到达时间"] = item[9] # 抵达时间在9号位置data[" 历时 "] = item[10] # 经历时间在10号位置data["商务座"] = item[32] or item[25] # 特别注意,商务座在32或25位置data["一等座"] = item[31] # 一等座信息在31号位置data["二等座"] = item[30] # 二等座信息在30号位置data["高级软卧"] = item[21] # 高级软卧信息在21号位置data["软卧"] = item[23] # 软卧信息在23号位置data["动卧"] = item[27] # 动卧信息在27号位置data["硬卧"] = item[28] # 硬卧信息在28号位置data["软座"] = item[24] # 软座信息在24号位置data["硬座"] = item[29] # 硬座信息在29号位置data["无座"] = item[26] # 无座信息在26号位置data["其他"] = item[22] # 其他信息在22号位置data["备注"] = item[1] # 备注信息在1号位置# 如果没有信息,那么就用“-”代替for pos in name:if data[pos] == "":data[pos] = "-"if data["硬卧"]=="-" and data["硬座"] =="-":continueif data["硬卧"]=="无" and data["硬座"] =="无":continueticketList.append(data)return ticketList

保存数据为csv文件

# 保存数据def writeData(ticketList):with open(date+from_station+"->"+to_station+'.csv', 'w', encoding='utf-8', newline='') as f:# writer = csv.DictWriter(f, fieldnames=["station_train_code","from_station_name","to_station_name","start_time","arrive_time",#"lishi","swz_num","zy_num","ze_num","dw_num","gr_num","rw_num","yw_num","rz_num","yz_num","wz_num","qt_num","note_num"])writer = csv.DictWriter(f, fieldnames=[" 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座","一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"])writer.writeheader() # 写入表头for each in ticketList:# 逐行写入writer.writerow(each)

if __name__ == '__main__':# 乘车日期date = "-12-15"# 出发站from_station = "秦皇岛"# 到达站to_station = "郑州"print(from_station)print(to_station)from_station1 = getCityCode2()[from_station]to_station1 = getCityCode2()[to_station]print("出发城市:",from_station)print("到达城市:",to_station)print("url:",url2.format(date, from_station1, to_station1))response = requests.get(url=url2.format(date, from_station1, to_station1), headers=headers)response.raise_for_status() # 如果发送了一个错误的请求,会抛出异常response.encoding = response.apparent_encodingprint("status_code:",response.status_code)selector = etree.HTML(response.text)try:msg1 = selector.xpath('//li[@id="err_bot"]/text()')[0]msg = msg1.strip()if msg == '网络可能存在问题,请您重试一下!':raise ValueError('网络可能存在问题,或者当前日期没有车次信息,请您重试一下')except IndexError:print('接口正常!')# traceback.print_exc()# showTicket(response.text)print("获取车次信息")ticketList = getdata(response.text)print("保存车次信息")writeData(ticketList)print("ticketList:",ticketList)list = []for item in ticketList:d = {}d['车次'] = item[' 车次 ']d['出发车站'] = item['出发车站']d['到达车站'] = item['到达车站']d['出发时间'] = item['出发时间']d['到达时间'] = item['到达时间']d['硬卧'] = item['硬卧']d['硬座'] = item['硬座']list.append(d)if len(list) :list_str = str(list)list_str2 = list_str.replace('}','\n')info = '日期:'+date+'\n'+list_str2print(info)SendeMail.main.sendTextEmail('从'+from_station+'前往'+to_station+'的',info)else:print('没有车次信息')

效果

2、发送邮件通知

# author: LiuShihao# data: /12/7 2:57 下午# youknow: 各位老铁,我的这套代码曾经有人出价三个亿我没有卖,如今拿出来和大家分享,不求别的,只求大家免费的小红心帮忙点一点,这里谢过了。# desc: 发送邮件import smtplibfrom email.mime.text import MIMEText# 第三方 SMTP 服务mail_host = "" # SMTP服务器mail_user = "*******@" # 用户名mail_pass = "******" # 密码(这里的密码不是登录邮箱密码,而是授权码)sender = '******@' # 发件人邮箱receivers = ['******@','******@','******@'] # 接收人邮箱 """title标题:从上海前往郑州12306车票信息info正文:"""# 1.使用Python发送纯文本电子邮件def sendTextEmail(title,info):content = info+'\n有座位!请尽快购票'title = title+'12306车票信息' # 邮件主题message = MIMEText(content, 'plain', 'utf-8') # 内容, 格式, 编码message['From'] = "{}".format(sender)message['To'] = ",".join(receivers)message['Subject'] = titletry:# QQ 邮箱需要 SSL 认证,所以 SMTP 已经不能满足要求,而需要SMTP_SSLsmtpObj = smtplib.SMTP_SSL(mail_host, 465) # 启用SSL发信, 端口一般是465smtpObj.login(mail_user, mail_pass) # 登录验证smtpObj.sendmail(sender, receivers, message.as_string()) # 发送print("mail has been send successfully.")except smtplib.SMTPException as e:print(e)if __name__ == '__main__':sendTextEmail('从北京前往的上海','Text')

效果

3、定时任务

将查询余票信息的方法封装一下,导入schedule模块

import schedule

if __name__ == '__main__':# 乘车日期date = "-12-15"# 出发站from_station = "秦皇岛"# 到达站to_station = "郑州"GET(date,from_station,to_station)# 每小时执行一次# schedule.every().hour.do(GET,date,from_station,to_station)# 每分钟执行一次schedule.every().minute.do(GET,date,from_station,to_station)while True:schedule.run_pending()time.sleep(2)

完整代码

# author: LiuShihao# data: /12/7 9:20 上午# youknow: 各位老铁,我的这套代码曾经有人出价三个亿我没有卖,如今拿出来和大家分享,不求别的,只求大家免费的小红心帮忙点一点,这里谢过了。# desc: 查询余票信息import requestsimport jsonimport refrom colorama import init,Forefrom prettytable import PrettyTablefrom lxml import etreeimport tracebackimport csvimport SendeMail.mainimport scheduleimport timefrom requests.packages.urllib3.exceptions import InsecureRequestWarningrequests.packages.urllib3.disable_warnings(InsecureRequestWarning)init(autoreset=False)"""leftTicketDTO.train_date:查询的日期 -12-31leftTicketDTO.from_station:查询的出发地 SHH上海leftTicketDTO.to_station:查询的目的地BXP北京 郑州ZZFpurpose_codes:不太清楚这个字段是用来做什么的,就默认吧可以从我们递交的URL请求看出,我们输入的成都,深圳都变成了对应的编号,比如,成都(CDW)、深圳(SZQ),所以当我们程序进行输入的时候要进行一下处理,12306的一个地方存储着这些城市名与编码对应的文档:/otn/resources/js/framework/station_name.js?station_version=1.8971"""class Colored(object):def red(self,s):return Fore.LIGHTRED_EX + s + Fore.RESETdef green(self,s):return Fore.LIGHTGREEN_EX + s + Fore.RESETdef yellow(self,s):return Fore.LIGHTYELLOW_EX + s + Fore.RESETdef white(self,s):return Fore.LIGHTWHITE_EX + s + Fore.RESETdef blue(self,s):return Fore.LIGHTBLUE_EX + s + Fore.RESETdriverfile_path = r'/Browser/Baidu/chromedriver'url2= r'/otn/leftTicket/query?leftTicketDTO.train_date={0}&leftTicketDTO.from_station={1}&leftTicketDTO.to_station={2}&purpose_codes=ADULT'headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5702.400 QQBrowser/10.2.1893.400",# 如果不携带cookie信息,调用接口12306会拒绝访问,显示网络错误"Cookie": "JSESSIONID=E9290F20C37A2BFA56C8F0CF732C7D05; BIGipServerpool_passport=266600970.50215.0000; RAIL_EXPIRATION=1607578913111; RAIL_DEVICEID=nK0qSNOKRXI4lxn6fPsRqT-rZdcQUhNIENZwmddIyDzW_auv2mnP-rktmCKFBKvqYVPM3VCb4q0IyoQDn5BC1M0a7XMAbJtEvcOsFHv-Qx6okcTqkXTRYWoX6_t9T20ny_DGckI5sAx7jIrBs8WLEOF7KmxuH27v; route=9036359bb8a8a461c164a04f8f50b252; _jc_save_fromStation=%u4E0A%u6D77%2CSHH; _jc_save_fromDate=-12-31; _jc_save_toDate=-12-07; _jc_save_wfdc_flag=dc; _jc_save_toStation=%u90D1%u5DDE%2CZZF; BIGipServerotn=233832970.64545.0000"}def getCity():url = "/otn/resources/js/framework/station_name.js?station_version=1.8971"response = requests.get(url, verify=False)# 将车站的名字和编码进行提取chezhan = re.findall(r'([\u4e00-\u9fa5]+)\|([A-Z]+)', response.text)chezhan_code = dict(chezhan)return chezhan_code# {'VAP': '北京北'}def getCityCode1():chezhan_code = getCity()# 进行交换chezhan_names = dict(zip(chezhan_code.values(),chezhan_code.keys() ))# 打印出得到的车站字典# print(chezhan_names) # {'VAP': '北京北', 'BOP': '北京东',return chezhan_names# {'北京北':'VAP'}def getCityCode2():chezhan_code = getCity()# 进行交换chezhan_names = dict(zip(chezhan_code.keys(),chezhan_code.values() ))# 打印出得到的车站字典# print('getCityCode2()',chezhan_names)return chezhan_names# 解析数据def getdata(html):chezhan_names = getCityCode1()html = json.loads(html)# 定义一个空列表ticketList = []for i in html['data']['result']:# 创建一个字典,用于存放字段信息name = [" 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座","一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"]# 创建一个字典 用于存放车次信息data = {" 车次 ": '',"出发车站": '',"到达车站": '',"出发时间": '',"到达时间": ''," 历时 ": '',"商务座": '',"一等座": '',"二等座": '',"高级软卧": '',"软卧": '',"动卧": '',"硬卧": '',"软座": '',"硬座": '',"无座": '',"其他": '',"备注": ''}# 将各项信息提取并赋值item = i.split('|') # 使用“|”进行分割data[" 车次 "] = item[3] # 获取车次信息,在3号位置data["出发车站"] = chezhan_names[item[6]] # 始发站信息在6号位置data["到达车站"] = chezhan_names[item[7]] # 终点站信息在7号位置data["出发时间"] = item[8] # 出发时间在8号位置data["到达时间"] = item[9] # 抵达时间在9号位置data[" 历时 "] = item[10] # 经历时间在10号位置data["商务座"] = item[32] or item[25] # 特别注意,商务座在32或25位置data["一等座"] = item[31] # 一等座信息在31号位置data["二等座"] = item[30] # 二等座信息在30号位置data["高级软卧"] = item[21] # 高级软卧信息在21号位置data["软卧"] = item[23] # 软卧信息在23号位置data["动卧"] = item[27] # 动卧信息在27号位置data["硬卧"] = item[28] # 硬卧信息在28号位置data["软座"] = item[24] # 软座信息在24号位置data["硬座"] = item[29] # 硬座信息在29号位置data["无座"] = item[26] # 无座信息在26号位置data["其他"] = item[22] # 其他信息在22号位置data["备注"] = item[1] # 备注信息在1号位置# 如果没有信息,那么就用“-”代替for pos in name:if data[pos] == "":data[pos] = "-"if data["硬卧"]=="-" and data["硬座"] =="-":continueif data["硬卧"]=="无" and data["硬座"] =="无":continueticketList.append(data)return ticketList# 将数据显示在控制台def showTicket(html):chezhan_names = getCityCode1()html = json.loads(html)table = PrettyTable([" 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座"," 一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"])for i in html['data']['result']:name = ["station_train_code","from_station_name","to_station_name","start_time","arrive_time","lishi","swz_num","zy_num","ze_num","dw_num","gr_num","rw_num","yw_num","rz_num","yz_num","wz_num","qt_num","note_num"]data = {"station_train_code": '',"from_station_name": '',"to_station_name": '',"start_time": '',"arrive_time": '',"lishi": '',"swz_num": '',"zy_num": '',"ze_num": '',"dw_num": '',"gr_num": '',"rw_num": '',"yw_num": '',"rz_num": '',"yz_num": '',"wz_num": '',"qt_num": '',"note_num": ''}#将各项信息提取并赋值item = i.split('|') #使用“|”进行分割data["station_train_code"] = item[3] #获取车次信息,在3号位置data["from_station_name"] = item[6] #始发站信息在6号位置data["to_station_name"] = item[7] #终点站信息在7号位置data["start_time"]= item[8] #出发时间在8号位置data["arrive_time"]= item[9] #抵达时间在9号位置data["lishi"] = item[10] #经历时间在10号位置data["swz_num"] = item[32] or item[25] #特别注意,商务座在32或25位置data["zy_num"] = item[31] #一等座信息在31号位置data["ze_num"] = item[30] #二等座信息在30号位置data["gr_num"] = item[21] #高级软卧信息在21号位置data["rw_num"] = item[23] #软卧信息在23号位置data["dw_num"] = item[27] #动卧信息在27号位置data["yw_num"] = item[28] #硬卧信息在28号位置data["rz_num"] = item[24] #软座信息在24号位置data["yz_num"] = item[29] #硬座信息在29号位置data["wz_num"] = item[26] #无座信息在26号位置data["qt_num"] = item[22] #其他信息在22号位置data["note_num"]= item[1] #备注信息在1号位置color = Colored()data["note_num"] = color.white(item[1])#如果没有信息,那么就用“-”代替for pos in name:if data[pos] == "":data[pos] = "-"tickets = []cont = []cont.append(data)for x in cont:tmp = []for y in name:if y == "from_station_name":s = color.green(chezhan_names[data["from_station_name"]])tmp.append(s)elif y == "to_station_name":s = color.red(chezhan_names[data["to_station_name"]])tmp.append(s)elif y == "start_time":s = color.green(data["start_time"])tmp.append(s)elif y == "arrive_time":s = color.red(data["arrive_time"])tmp.append(s)elif y == "station_train_code":s = color.yellow(data["station_train_code"])tmp.append(s)else:tmp.append(data[y])tickets.append(tmp)for ticket in tickets:table.add_row(ticket)print(table)# 保存数据def writeData(ticketList):with open(date+from_station+"->"+to_station+'.csv', 'w', encoding='utf-8', newline='') as f:# writer = csv.DictWriter(f, fieldnames=["station_train_code","from_station_name","to_station_name","start_time","arrive_time",#"lishi","swz_num","zy_num","ze_num","dw_num","gr_num","rw_num","yw_num","rz_num","yz_num","wz_num","qt_num","note_num"])writer = csv.DictWriter(f, fieldnames=[" 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座","一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"])writer.writeheader() # 写入表头for each in ticketList:# 逐行写入writer.writerow(each)def GET(date,from_station,to_station):print("现在是", time.strftime('%Y年%m月%d日 %H时%M分%S秒', time.localtime()),'查询余票信息',)from_station1 = getCityCode2()[from_station]to_station1 = getCityCode2()[to_station]response = requests.get(url=url2.format(date, from_station1, to_station1), headers=headers,verify=False)response.raise_for_status() # 如果发送了一个错误的请求,会抛出异常response.encoding = response.apparent_encodingprint("status_code:", response.status_code)selector = etree.HTML(response.text)try:msg1 = selector.xpath('//li[@id="err_bot"]/text()')[0]msg = msg1.strip()if msg == '网络可能存在问题,请您重试一下!':raise ValueError('网络可能存在问题,或者当前日期没有车次信息,请您重试一下')except IndexError:print('接口正常!')print("获取车次信息")ticketList = getdata(response.text)print("保存车次信息")writeData(ticketList)# print("ticketList:", ticketList)list = []for item in ticketList:d = {}d['车次'] = item[' 车次 ']d['出发车站'] = item['出发车站']d['到达车站'] = item['到达车站']d['出发时间'] = item['出发时间']d['到达时间'] = item['到达时间']d['硬卧'] = item['硬卧']d['硬座'] = item['硬座']list.append(d)if len(list):list_str = str(list)list_str2 = list_str.replace('}', '\n')info = '日期:' + date + '\n' + list_str2print(info)SendeMail.main.sendTextEmail('从' + from_station + '前往' + to_station + '的', info)else:print('没有车次信息')if __name__ == '__main__':# 乘车日期date = "-12-15"# 出发站from_station = "秦皇岛"# 到达站to_station = "郑州"GET(date,from_station,to_station)# 每小时执行一次# schedule.every().hour.do(GET,date,from_station,to_station)# 每分钟执行一次schedule.every().minute.do(GET,date,from_station,to_station)while True:schedule.run_pending()time.sleep(2)

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。