【selenium+chromedriver】设置代理爬取拉勾网_综合

from selenium import webdriver
from lxml import etree
import rechromeOptions = webdriver.ChromeOptions()
driver_path = r'D:\chromedriver\chromedriver.exe'
chromeOptions.add_argument("--proxy-server=http://代理ip:端口")  # 网上搜一下大把免费代理ip，66代理不错
driver = webdriver.Chrome(executable_path=driver_path, chrome_options = chromeOptions)
driver.get('https://www.lagou.com/jobs/list_java?labelWords=&fromSearch=true&suginput=')html = etree.HTML(driver.page_source)
items = html.xpath("//ul[@class='item_con_list']/li")
nodes = html.xpath("//div[@class='li_b_l']/text()")
asks = []
for node in nodes:if re.sub(r'[ \n]', '', node).__len__() != 0:asks.append(re.sub(r'[ \n]', '', node))for index, item in enumerate(items):print('公司：%s\t\t职位：%s' % (item.get('data-company'), item.get('data-positionname')))print('工作年限/学历：%s\t\t薪资：%s' % (asks[index], item.get('data-salary')))print('##' * 50)

看了一些博客解决反爬，把数据清洗、存储也给写了。
值得借鉴的也就反爬的十几行，结果来了上百行代码，如果读者用的是elasticsearch，写的是mysql\cvs是不是浪费读者时间？藕的话就直接关了喔
在这里插入图片描述