1000字范文,内容丰富有趣,学习的好帮手!
1000字范文 > python简单爬取人人车网站在售北京二手车车况信息并存入mysql数据库

python简单爬取人人车网站在售北京二手车车况信息并存入mysql数据库

时间:2023-10-28 19:13:33

相关推荐

python简单爬取人人车网站在售北京二手车车况信息并存入mysql数据库

直接上代码

'''人人车 北京二手车 1.名称,2.价格,3.详情,4.图片,5.生产日期,6.公里数,7.首付,8.降价信息,9.车牌所在地,10.排放标准11.过户记录,12.车主评价,13.车况信息——车辆外观,14.车况信息——车辆内饰,15.车况信息——车辆底盘,16.机构检测结果,17.年检到期时间18.商业险到期时间,19.有无购车发票,20.是否4S店保养,21.交强险到期时间'''import pymysqlimport requestsimport timefrom bs4 import BeautifulSoupfrom pyquery import PyQuery as pq#用来存放二手汽车名称car_name = []#用来存放二手车价格car_price = []#用来存放汽车详情页链接car_url = []#用来存放二手车图片链接car_picture_url = []#用来存放二手车生产日期car_date = []#用来存放二手车行驶里程数car_km = []#用来存放二手车首付价格car_pay = []#用来存放降价信息car_pi = []#用来存放车牌所在地信息car_location = []#用来存放二手车排放标准car_es = []#用来存放二手车过户记录car_tf = []#用来存放车主评价car_usertx = []#用来存放车况信息car_condit = []#用来存放车况信息——车辆外观car_condit_out = []#用来存放车况信息——车辆内饰car_condit_in = []#用来存放车况信息——车辆底盘car_condit_chassis = []#用来存放机构检测结果car_result = []#用来存放年检到期时间car_procedures_YearlyInspection = []#用来存放商业险到期时间car_Ciet = []#有无购车发票car_invoice = []#是否4S店保养car_maintain = []#用来存放交强险到期时间car_compulsory = []page = 1def db_mysql(car_name,car_date,car_km,car_price,car_pay,car_pi,car_url,car_picture_url,car_location,car_es,car_tf,car_usertx,car_condit_out, car_condit_in, car_condit_chassis,car_result,car_procedures_YearlyInspection, car_Ciet, car_invoice, car_maintain, car_compulsory):# 打开数据库db = pymysql.connect(host='localhost', user='root', password='1234', port=3306, database='rrc')# 创建游标cursor = db.cursor()i = 1sql = 'insert into rrc_table(car_name,car_date,car_km,car_price,car_pay,car_pi,car_url,car_picture_url,car_location,car_es,car_tf,car_usertx,car_condit_out, car_condit_in, car_condit_chassis,car_result,car_procedures_YearlyInspection, car_Ciet, car_invoice, car_maintain, car_compulsory) ' \'values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'for name,date,km,price,pay,pi,url,picture_url,location,es,tf,usertx,condit_out,condit_in,condit_chassis,result,procedures_YearlyInspection,Ciet,invoice,maintain,compulsory in \zip(car_name,car_date,car_km,car_price,car_pay,car_pi,car_url,car_picture_url,car_location,car_es,car_tf,car_usertx,car_condit_out, car_condit_in, car_condit_chassis,car_result,car_procedures_YearlyInspection, car_Ciet, car_invoice, car_maintain, car_compulsory):try:cursor.execute(sql,(name,date,km,price,pay,pi,url,picture_url,location,es,tf,usertx,condit_out,condit_in,condit_chassis,result,procedures_YearlyInspection,Ciet,invoice,maintain,compulsory))mit()print("爬取成功")except:print("第" + str(i) + "条数据出现数据插入异常")db.rollback()i += 1db.close()def getcar_main():for x in range(0,50):base_url = '/bj/ershouche/p'print('开始第'+str(x+1)+'页内容爬取')url = base_url + str(page)car_name = get_carname(url)car_date, car_km = get_producedate(url)car_price = get_price(url)car_pay = get_dp(url)car_pi = get_pi(url)car_url = get_carurl(url)car_picture_url = get_picture(url)print("正在抓取车牌所在地...")car_location = get_carLicense(car_url)print("正在抓取二手车排放标准...")car_es = get_es(car_url)print("正在抓取二手车过户记录...")car_tf = get_transfer(car_url)print("正在抓取车主评价...")car_usertx = get_omt(car_url)print("正在抓取车况信息...")car_condit_out, car_condit_in, car_condit_chassis = get_condit(car_url)print("正在抓取机构检测结果...")car_result = get_result(car_url)print("正在抓取车辆手续信息...")car_procedures_YearlyInspection, car_Ciet, car_invoice, car_maintain, car_compulsory = get_procedures(car_url)print("正在存入数据库...")db_mysql(car_name,car_date,car_km,car_price,car_pay,car_pi,car_url,car_picture_url,car_location,car_es,car_tf,car_usertx,car_condit_out, car_condit_in, car_condit_chassis,car_result,car_procedures_YearlyInspection, car_Ciet, car_invoice, car_maintain, car_compulsory)print('开始清空列表列表...')car_name.clear()car_date.clear()car_km.clear()car_price.clear()car_pay.clear()car_pi.clear()car_url.clear()car_picture_url.clear()car_location.clear()car_es.clear()car_tf.clear()car_usertx.clear()car_condit_out.clear()car_condit_in.clear()car_condit_chassis.clear()car_result.clear()car_procedures_YearlyInspection.clear()car_Ciet.clear()car_invoice.clear()car_maintain.clear()car_compulsory.clear()print('所有列表已清空')def get_page(url):try:Headers = {'user-agent': 'Mozilla/5.0'}rs = requests.get(url=url,headers = Headers)if rs.status_code == 200:html = rs.textdoc = pq(html)return docexcept:print("url出错了!")def get_in_page(url):try:Headers = {'user-agent': 'Mozilla/5.0'}rs = requests.get(url=url,headers = Headers)if rs.status_code == 200:html = rs.textdoc = pq(html)return docexcept:print("url出错了!")#function1:获取二手车详情页链接def get_carurl(url):doc = get_page(url)for url in doc(".thumbnail ").items():car_url.append(''+url.attr.href)return car_url#function2:获取二手车名称def get_carname(url):doc = get_page(url)for name in doc(".schedule.btn-base.btn-wireframe").items():car_name.append(name.attr('data-title'))return car_name#function3:获取二手车生产日期和公里数def get_producedate(url):doc = get_page(url)for date in doc(".mileage").items():car_date.append(date.text().split('/')[0])car_km.append(date.text().split('/')[1])return car_date,car_km#function4:获取二手车价格def get_price(url):doc = get_page(url)for price in doc(".tags-box").children('.price').remove('.down-payment').items():car_price.append(price.text())return car_price#function5:获取二手车图片链接def get_picture(url):doc = get_page(url)for p in doc(".thumbnail").children('.img-backgound').children('img').items():if p.attr('data-src') is None:car_picture_url.append('https:'+p.attr('src'))else:car_picture_url.append('https:'+p.attr('data-src'))return car_picture_url#function6:获取二手车首付def get_dp(url):doc = get_page(url)for pice in doc(".tags-box").items():# print(len(pice.text().split("\n")))x = len(pice.text().split("\n"))if x > 1:car_pay.append(pice.text().split("\n")[2])else:car_pay.append("不可首付")return car_pay#function7:获取二手车降价信息def get_pi(url):doc = get_page(url)for pi in doc(".thumbnail").items():if "已降" in pi.text().split("\n"):car_pi.append("已降"+pi.text().split("\n")[1])else:car_pi.append("近期未降价")return car_pi#function7:获取二手车车牌所在地信息def get_carLicense(car_url):for url in car_url:doc = get_in_page(url)for Lpl in doc("#car-licensed").items():car_location.append(Lpl.text())return car_location#function9:获取二手车排放标准def get_es(car_url):for url in car_url:doc = get_in_page(url)for es in doc(".span5.car-fluid-standard .detail-version3-right-icon .car-summary").items():car_es.append(es.text())return car_es#function10:获取二手车过户记录def get_transfer(car_url):for url in car_url:doc = get_in_page(url)for tf in doc("#zhimaicar-detail-header-right .row-fluid-wrapper .car-transfer .car-summary").items():car_tf.append(tf.text())return car_tf#function11:获取车主评价def get_omt(car_url):for url in car_url:doc = get_in_page(url)for tx in doc(".text-about-car-owner .owner-main-text").items():car_usertx.append(tx.text())return car_usertx#function12:获取车况信息def get_condit(car_url):for url in car_url:doc = get_in_page(url)for cd in doc("#gallery .detail-car-appearance-title .zhimai-subtitle").items():car_condit.append(cd.text())car_condit_out = car_condit[0::3]car_condit_in = car_condit[1::3]car_condit_chassis =car_condit[2::3]return car_condit_out,car_condit_in,car_condit_chassis#function13:获取机构检测结果def get_result(car_url):for url in car_url:doc = get_in_page(url)for rs in doc(".report-inner-box .report-main .report-result-des").items():car_result.append(rs.text())return car_result#function14:获取车辆手续信息def get_procedures(car_url):for url in car_url:doc = get_in_page(url)for pd in doc(".interval-title-content").items():car_procedures_YearlyInspection.append(pd.text().split("\n")[1])car_Ciet.append(pd.text().split("\n")[3])car_invoice.append(pd.text().split("\n")[5])car_invoice.append(pd.text().split("\n")[5])car_maintain.append(pd.text().split("\n")[7])car_compulsory.append(pd.text().split("\n")[9])return car_procedures_YearlyInspection,car_Ciet,car_invoice,car_maintain,car_compulsory#运行程序getcar_main()

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。