import requests
from lxml import etree
from mysql import MYSQL
1. 声明一个lianjia函数
def lianjia(url,headers):
for i in range(1,3,30):
url = url.format(i)
req = requests.get(url,headers=headers)
html = req.text
html = etree.HTML(html)
url=html.xpath('//ul[@class="sellListContent"]//div[@class="title"]/a/@href')
for detaurl in url:
xiangqing(detaurl)
def xiangqing(url):
req = requests.get(url)
html = req.text
html = etree.HTML(html)
req = requests.get(url)
xiaoqu=html.xpath('//div[@class="aroundInfo"]//a[@target="_blank"]/text()')[0]
pingmi = html.xpath('//div[@class="houseInfo"]//div[@class="mainInfo"]/text()')[2]
jiage = html.xpath('//div[@class="overview"]//span[@class="total"]/text()')
sql = 'insert into lianjia(xiaoqu,pingmi,jiage) values (%s,%s,%s)'
deta = (xiaoqu,pingmi,jiage)
lanj.execute_modify_mysql(sql, deta)
if __name__ == "__main__":
lanj = MYSQL()
url = "https://bj.lianjia.com/ershoufang/huilongguan2/pg2/"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
lianjia(url,headers)