1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > python selenium 爬取去哪儿网的数据

python selenium 爬取去哪儿网的数据

时间:2023-08-09 16:45:23

相关推荐

python selenium 爬取去哪儿网的数据

python selenium 爬取去哪儿网的数据

完整代码下载:/tanjunchen/SpiderProject/tree/master/selenium+qunaerwang

#!/usr/bin/python# -*- coding: UTF-8 -*-from selenium import webdriverimport datetimefrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECimport timefrom bs4 import BeautifulSoup as bsimport codecsclass QunaManager(object):def get_hotel(self, driver, to_city, fromdate, todate):ele_toCity = driver.find_element_by_name('toCity')ele_fromDate = driver.find_element_by_id('fromDate')ele_toDate = driver.find_element_by_id('toDate')ele_search = driver.find_element_by_class_name('search-btn')ele_toCity.clear()ele_toCity.send_keys(to_city)ele_toCity.click()ele_fromDate.clear()ele_fromDate.send_keys(fromdate)ele_toDate.clear()ele_toDate.send_keys(todate)ele_search.click()page_num = 0while True:try:WebDriverWait(driver, 10).until(EC.title_contains(to_city))except Exception as e:print(e)breaktime.sleep(3)js = "window.scrollTo(0, document.body.scrollHeight);"driver.execute_script(js)time.sleep(5)html_content = driver.page_sourcesoup = bs(html_content, 'lxml')infos = soup.find_all(class_="item_hotel_info")f = codecs.open(to_city + fromdate + '.html', 'a', 'utf-8')for info in infos:f.write(str(page_num) + '--' * 50)content = info.get_text().replace(" ", "").replace("\t", "").strip()for line in [ln for ln in content.splitlines() if ln.strip()]:f.write(line)f.write('\r\n')f.close()try:next_page = WebDriverWait(driver, 10).until(EC.visibility_of(driver.find_element_by_css_selector(".item.next")))next_page.click()page_num += 1time.sleep(5)except Exception as e:print(e)breakdriver.close()def crawl(self, root_url, to_city):today = datetime.date.today().strftime('%Y-%m-%d')tomorrow = datetime.date.today() + datetime.timedelta(days=1)tomorrow = tomorrow.strftime('%Y-%m-%d')driver = webdriver.Chrome()driver.set_page_load_timeout(10)driver.get(root_url)driver.maximize_window()driver.implicitly_wait(10)self.get_hotel(driver, to_city, today, tomorrow)if __name__ == '__main__':spider = QunaManager()spider.crawl("/", '上海')

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。