1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > python结合selenium.webdriver+PhantomJS登陆qq邮箱抓取数据

python结合selenium.webdriver+PhantomJS登陆qq邮箱抓取数据

时间:2019-08-27 06:29:58

相关推荐

python结合selenium.webdriver+PhantomJS登陆qq邮箱抓取数据

所有监控邮件发送到qq邮箱,需要分析一些服务器监控数据

qq邮箱登陆验证过程较为复杂,用urllib或request比较困难,可以用selenium.webdriver+PhantomJS无窗口画登陆qq邮箱,只是速度稍慢,但也能接受

事先安装好selenium

pip install selenium

下载phantomjs.exe

/download.html

#!/usr/bin/env python# _*_coding:utf-8_*_# selenium 模拟登陆QQ邮箱import timeimport osimport refrom selenium import webdriverfrom mon.keys import Keysfrom mon.desired_capabilities import DesiredCapabilitiesfrom bs4 import BeautifulSoupfrom urllib import parseos.chdir('E:\\python3\\爬虫\\')u = "邮箱帐号"p = "邮箱密码"phajs_path = 'phantomjs.exe'CORP_API_TYPE = {'base': '/','neicun': '/cgi-bin/mail_list',}class QQ_mail():headers = {':authority': ''}cap = DesiredCapabilities.PHANTOMJS.copy()cap["phantomjs.page.settings.loadImages"] = Falsefor key, value in headers.items():cap['phantomjs.page.customHeaders.{}'.format(key)] = valuedef __init__(self):# self.driver = webdriver.Firefox()self.uname = uself.pwd = pself.driver = self.test_mail_login(CORP_API_TYPE['base'])print("正在访问QQ邮箱")@staticmethoddef __makeUrl(shortUrl):base = ""if shortUrl == '/':return base + shortUrlelse:return base + '/' + shortUrl@staticmethoddef __appendArgs(url, args):if args is None:return urlfor key, value in args.items():if '?' in url:url += ('&' + key + '=' + value)else:url += ('?' + key + '=' + value)return urldef test_mail_login(self, shortUrl, args=None):"""QQ邮箱登录"""try:driver = webdriver.PhantomJS(phajs_path, desired_capabilities=QQ_mail.cap)driver.start_session(QQ_mail.cap)driver.implicitly_wait(30)driver.set_window_size(800, 600)driver.get(self.__makeUrl(shortUrl))driver.switch_to.frame("login_frame")driver.find_element_by_id("switcher_plogin").click()driver.find_element_by_id("u").clear()driver.find_element_by_id("u").send_keys(self.uname)driver.find_element_by_id("p").clear()driver.find_element_by_id("p").send_keys(self.pwd)driver.find_element_by_id("p").send_keys(Keys.ENTER)print("登录QQ邮箱成功")return driverexcept Exception as e:raise edef get_floder_url(self, args):"""获取<内存监控>url地址中的参数,返回字符串sid"""driver = self.drivertime.sleep(3)try:link = driver.find_element_by_id(args).get_attribute('href')except Exception as e:raise eelse:link_args = parse.parse_qs(link.split('?')[1])return link_args['sid'][0]def parse_mail_list(self, shortUrl, args=None):"""抓取内存监控页面,清洗数据"""driver = self.driver# 构造url地址,//cgi-bin/mail_list?folderid=137&page=11&sid=idTg0AOevkwneTph&nocheckframe=trueurl = self.__makeUrl(shortUrl)url = self.__appendArgs(url, args)print('开始抓取:', url)try:ip_add = []driver.get(url)time.sleep(3)# print(driver.page_source)response = driver.page_sourcesoup = BeautifulSoup(response, 'html.parser')tf_no = soup.find_all('div', {'class': 'tf no'})for i in tf_no:black_tt = i.find('u', {'class': 'black tt '}).get_text()if '恢复' in black_tt or "0个" in black_tt:continueelse:text = i.find('b', {'class': 'no '}).get_text()ip = re.search(r'\d+\.\d+\.\d+\.\d+', text).group(0)ip_add.append(ip)return ip_addexcept Exception as e:raise edef tearDown(self):self.driver.quit()def main():mail = QQ_mail()sid = mail.get_floder_url("folder_137")ip_all = []# 抓取页数for i in range(12):ip_add = mail.parse_mail_list(CORP_API_TYPE['neicun'],{'folderid': '137','page': str(i),'sid': sid,'nocheckframe': 'true',})ip_all += ip_addprint("ip地址 内存不足报警次数")# 统计列表元素出现次数ip_set = set(ip_all)for item in ip_set:print('{}:{}'.format(item, ip_all.count(item)))mail.tearDown()if __name__ == "__main__":main()

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。