1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > python通过selenium绕过反扒系统跨网页批量获取股票财务信息

python通过selenium绕过反扒系统跨网页批量获取股票财务信息

时间:2021-03-07 20:12:48

相关推荐

python通过selenium绕过反扒系统跨网页批量获取股票财务信息

完整代码:

import xlwings as xwimport requestsfrom bs4 import BeautifulSoupfrom datetime import datetimeimport jsonimport xlwtimport xlwings as xwfrom selenium import webdriverimport timeimport pandas as pdimport csvimport refrom selenium.webdriver import Chrome, ChromeOptions# item_list=[]df = pd.DataFrame()def data_a(html): # 获取基础信息1# with open('rrBand.html', 'r', encoding='utf-8') as f:# html = BeautifulSoup(f, 'lxml')# html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})# print(html.list)df = pd.DataFrame()# print(html)for i, item in enumerate(html):# print(item)# print(html.list_a)try:bandNanme = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:].strip(')')df['序号'] = '',# df['股票'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:bandNanme.find('(')].strip(# ')'),# df['代码'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('')[5:].strip(')').replace(':',''),# print(df[['股票','代码']])df['股价'] = item.find_all('div', attrs={'class', 'stock-current'})[0].text.strip('¥'),# print(df['股价'])html.list_a = item.find_all('table', attrs={'class', 'quote-info'})for i,item_a in enumerate(html.list_a):# print(item_a.find_all('span'))# for i in range(60):#print(item_a.find_all('span')[i].text,i,sep=',')df['总市值(亿)'] = item_a.find_all('span')[19].text.strip('亿'),df['总股本(亿)'] = '',df['营市比'] = '',# print(item_a.find_all('span')[i].text,i,sep=',')#_a.find_all('span')[18].text.strip('亿'),i,sep=','),df['PE市盈率'] = item_a.find_all('span')[10].text.strip(''),df['PB市净率'] = item_a.find_all('span')[10].text.strip(''),#str((float(df['股价']) / float(item_a.find_all('span')[20].text.strip(''))))[0:4],# print(df['PB市净率'])print(str(i),"第一模块写入正常")except:print(str(i), "第一模块写入异常")## continuereturn df# df.to_csv('fundWebd.csv', index=None, encoding='utf-8-sig',sep=',')#mode='a', header=None,index=None,# print(df[['股价','总市值','EPS每股收益']])def data_b(html): # 获取基础信息2# url='/snowman/S/SH600282/detail#/ZYCWZB'# print('12')#with open('Band.html', 'r', encoding='utf-8') as f:# html = BeautifulSoup(f, 'lxml')# html.list_b = html.find_all('tbody')df = pd.DataFrame()bandIncome=[]for i, item in enumerate(html):# print(item)html.list_b_a=item.find_all('tr')for i,item in enumerate(html.list_b_a):html.list_b_a_a = item.find_all('td')# print(item.find_all('td'))for i, item in enumerate(html.list_b_a_a):try:html.list = item.find_all('p')[0].contents[0]bandIncome.append(html.list)# print(bandIncome,i,sep=',')# for i, item in enumerate(html.list):# print(item)html.list_b = item.find_all('table', attrs={'class', 'quote-info'})# print(html.list_a# bandNanme = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:].strip(')')# df['利润(亿)'] = '',# df['利润(亿)'] = '',print(str(i), "第二模块写入正常")except:# continueprint(str(i), "第二模块写入异常")df['营业额'] =bandIncome[0].strip('亿'),df['EPS每股收益'] =bandIncome[30].strip(''),#item_a.find_all('span')[16].text.strip(''),df['负债率'] =bandIncome[85].strip(''),df['经营现金流'] =bandIncome[50].strip(''),df['利润(亿)'] =bandIncome[13].strip('亿'),df['利润(亿)'] = bandIncome[10].strip('亿'),df['未分配利润'] = bandIncome[45].strip(''),df['公积金'] = bandIncome[40].strip(''),df['毛利率'] = bandIncome[75].strip(''),df['净利率'] = bandIncome[80].strip(''),df['ROA总报酬率'] = bandIncome[65].strip(''),df['ROE净收益率'] = bandIncome[60].strip(''),df['账款周期'] = bandIncome[120].strip(''),df['存货周转'] = bandIncome[150].strip(''),df['总资产周转率'] = bandIncome[145].strip(''),print(df)for i in range(len(bandIncome)):print(bandIncome[i],i,sep=',')return dfdef data_c(html,html1):# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH601991/detail#/FHPS'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')df = pd.DataFrame()for i,item in enumerate(html1):# print(item.text)bandNanme=item.text# print(item.text.strip('')[bandNanme.find('('):].strip(')'))df['股票'] = item.text.strip('')[0:bandNanme.find('(')].strip(')'),df['代码'] = item.text.strip('')[bandNanme.find('(')+1:].strip(')'),# print(df)# print(df['股票'].valuse, df['代码'].values)# df['股票'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:bandNanme.find('(')].strip(# ')'),# df['代码'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('')[5:].strip(')').replace(':',''),# print(df[['股票','代码']])for i,item in enumerate(html):try:cut_a=item.find_all('td')[1].text.strip().find('派')cut_b = item.find_all('td')[1].text.strip().find('元')# print(cut_a,cut_b)# print(item)# print(item.find_all('td')[1].text.strip()[cut_a+1:cut_b])df['分红率'] = '',df['分红']=item.find_all('td')[1].text.strip()[cut_a+1:cut_b],print(df)print(str(i),"第三模块写入正常")except:print(str(i), "第三模块写入异常")return dfdef data_d(html):# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH600282/detail#/ZCFZB'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')df = pd.DataFrame()# print(html)for i, item in enumerate(html):# try:cut_a = item.find_all('td')[7].text.strip('').find('亿')# print(cut_a)# print(item.find_all('td')[7].text.strip('')[0: cut_a-1])# print(item.find_all('td')[61].text.strip('')[0: cut_a-1])df['货币资金']=item.find_all('td')[7].text.strip('')[0: cut_a],df['存货']=item.find_all('td')[61].text.strip('')[0: cut_a],# for i in range(300):#print(item.find_all('p')[i],i,sep=',')print(df)print(str(i), "第四模块写入正常")# except:#print(str(i), "第四模块写入正常")return dfdef data_e(html):# with open('Band.html','r',encoding='utf-8') as f:# # url='/snowman/S/SH601600/detail#/GSLRB'# html=BeautifulSoup(f,'lxml')# html.list=html.find_all('tbody')df = pd.DataFrame()# print(html)for i, item in enumerate(html):try:cut_a = item.find_all('td')[43].text.strip('').find('+')cut_b = item.find_all('td')[55].text.strip('').find('+')cut_c = item.find_all('td')[61].text.strip('').find('+')df['研发费用']=item.find_all('td')[43].text.strip(''),#[0:cut_a],df['利息费用'] =item.find_all('td')[55].text.strip(''),#[0:cut_b],df['利息收入'] = item.find_all('td')[61].text.strip(''),#[0:cut_c],# print(item.find_all('td')[43].text.strip('')[0:cut_a-1])# print(df['研发费用'].values, df['利息费用'].values, df['利息收入'].values ,sep=',')print(df)# for i in range(100):#print(item.find_all('td')[i].text,i,sep=',')print(str(i), "第五模块写入正常")except:print(str(i), "第五模块写入异常")return df# 写入csv中if __name__ == "__main__":# 创建一个workbookapp = xw.App(visible=False, add_book=False)wb = app.books.open('fundWebd.xlsx')# 创建一个worksheetsh = wb.sheets['worksheet']rng = [i for i in sh.range("c:c").value if i != None]#单元格内容j = sh.range('a1').expand('table').rows.count#序号app.display_alerts = Falseapp.screen_updating = False# rng = sh.range('a1').expand('table')# nrows = rng.rows.count# a = sh.range(f'a1:a{nrows}').value# a = [ i for i in sht.range(a:a).value if i != None]# 打开网页opt = ChromeOptions() # 创建Chrome参数对象opt.headless =False #True# # 把Chrome设置成可视化无界面模式,driver = Chrome(options=opt)# driver = webdriver.Chrome()df_a=[]df_b=[]df_c = []df_d = []df_e = []for i in range(len(rng)-1):print(str(i),rng[i],'第'+str(i+1)+'只股票开始写入')#rng[i+1]try:bandcode=rng[i+1]#'SH601600'xueqiu_url='/S/'+bandcode#雪球网基础数据'/'#很好的ide工具xueqiu_url_a='/snowman/S/'+bandcode+'/detail#/ZYCWZB'#主要指标xueqiu_url_c= '/snowman/S/'+bandcode+'/detail#/FHPS'#分红xueqiu_url_d = '/snowman/S/'+bandcode+'/detail#/ZCFZB' # 存货xueqiu_url_e ='/snowman/S/'+bandcode+'/detail#/GSLRB'#研发、利息收入# DFCF_url='/PC_HSF10/OperationsRequired/Index?type=web&code=SH601600'#基础数据1加载driver.get(xueqiu_url)#加载网址time.sleep(1) # 休眠1秒source =driver.page_source#获取网页内容html=BeautifulSoup(source,'html.parser')#获取网页内容time.sleep(1) # 休眠1秒html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})df_a = data_a(html.list) # 执行语句块time.sleep(1) # 休眠1秒#基础数据2加载主要指教driver.back() # 加载网址time.sleep(2)driver.get(xueqiu_url_a) # 加载网址# driver.find_elements_by_class_name('btn active').click()# driver.find_element_by_xpath(".//*[@id='header']/div[1]/div/form/input[2]").click()time.sleep(4)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()# print(driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").text) # /span[contains(@class,'btn')]time.sleep(3) # 休眠4秒source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)time.sleep(2) # 休眠1秒html.list_b = html.find_all('tbody')df_b = data_b(html.list_b) # 执行语句块time.sleep(2)#基础数据三加载分红driver.back() # 加载网址# time.sleep(1)driver.get(xueqiu_url_c) # 加载网址time.sleep(3)source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)time.sleep(1) # 休眠1秒html.list_c = html.find_all('tbody')html.list_c2 = html.find_all('div',attrs={'stock-info-name'})# print(html.list_c2)df_c= data_c(html.list_c,html.list_c2) # 执行语句块# print(html)time.sleep(1)# 基础数据四加载资产负债表driver.get(xueqiu_url) # 加载网址time.sleep(2)driver.get(xueqiu_url_d) # 加载网址time.sleep(4)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()time.sleep(4)source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)time.sleep(1) # 休眠1秒html.list_d=html.find_all('tbody')df_d= data_d(html.list_d) # 执行语句块df_d.to_json('fundWebdTest.json', orient='records', force_ascii=False) # ,orient="values")time.sleep(2)# 基础数据五加载利润表driver.back()time.sleep(2)driver.get(xueqiu_url_e )#xueqiu_url_etime.sleep(4)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()time.sleep(2)source=driver.page_source# 获取网页内容html=BeautifulSoup(source,'html.parser')# 获取网页内容# print(html)time.sleep(1) # 休眠1秒html.list_e = html.find_all('tbody')df_e = data_e(html.list_e) # 执行语句块# df_e.to_json('fundWebdTest.json', orient='records', force_ascii=False) # ,orient="values")time.sleep(3)# with open('rrBand.html','w',encoding='utf-8') as f:#写入网页#f.write(source)except:# continueprint(str(i),"当页数据操作失败")# with open('rrBand.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})#df_a = data_a(html.list) # 执行语句块# with open('Band.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list_b = html.find_all('tbody')#df_b = data_b(html.list_b) # 执行语句块# with open('Band.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list_b = html.find_all('tbody')# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH600282/detail#/ZCFZB'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')# with open('Band.html','r',encoding='utf-8') as f:# # url='/snowman/S/SH601600/detail#/GSLRB'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')#以下为写入模板# df=pd.concat([df_a,df_b],axis=1)#列合并,axis=0表示按行合并df = df_a.append(df_b)# print(df_c,'测试')df1=pd.concat([df_a,df_b],axis=1)#按列合并# print(df1)df2=pd.concat([df1,df_c],axis=1)#按列合并# print(df2)df3 = pd.concat([df2, df_d], axis=1) # 按列合并# print(df3)df4 = pd.concat([df3, df_e], axis=1) # 按列合并print(df4)df=pd.concat([df4,df],axis=0)#按行合并,这里很重要print(df)df.to_csv("fundWebd.csv", mode="a+", header=None, index=None, encoding="utf-8-sig",sep=',')#提前写入vsv文件# item_list.append(df)# print(item_list)df.to_json('fundWebd.json',orient ='records', force_ascii=False)#,orient="values")# with open('fundWebd.json','r',encoding='utf-8') as f:#data = json.load(f)# item_list.append(data)# with open('.fund.json', 'w', encoding='utf-8')as f:#json.dump(item_list,f, indent=1, ensure_ascii=False)with open('fundWebd.json', 'r', encoding='utf-8') as f:data = json.load(f)# print(data[0]['股票'])bandN = ['序号', '股票', '代码', '股价', '总市值(亿)', '总股本(亿)', '营业额', 'EPS每股收益', '分红', '分红率', '营市比', 'PE市盈率',\'PB市净率', '负债率','经营现金流','货币资金','存货','利息费/收','利润(亿)','利润(亿)','利润复增率','营业额复合增长率',\'季度增长率','现金收入比','PEG','未分配利润','公积金','毛利率','净利率','ROA总报酬率','ROE净收益率','账款周期','存货周转','总资产周转率']for i in range(len(data)):#写入数据try:print(len(data))sh.cells[i+1,0].value=i+1sh.cells[i+1, 1].value=data[i][bandN[1]]sh.cells[i+1, 2].value=data[i][bandN[2]]sh.cells[i+1, 3].value = data[i][bandN[3]]sh.cells[i+1, 4].value = data[i][bandN[4]]sh.cells[i+1, 5].value = data[i][bandN[5]]sh.cells[i+1, 6].value = data[i][bandN[6]]#营业额sh.cells[i+1, 7].value = data[i][bandN[7]]sh.cells[i+1, 8].value = data[i][bandN[8]]#分红# sh.cells[i+1, 9].value = data[i][bandN[9]]#分红率# sh.cells[i+1, 10].value = data[i][bandN[10]]#营市比sh.cells[i+1, 11].value = data[i][bandN[11]]sh.cells[i+1, 12].value = data[i][bandN[12]]sh.cells[i+1, 13].value = data[i][bandN[13]]#负债率sh.cells[i + 1, 14].value = data[i][bandN[14]] # 经营现金流sh.cells[i + 1, 15].value = data[i][bandN[15]] # 货币资金sh.cells[i + 1, 16].value = data[i][bandN[16]] # 存货# sh.cells[i + 1, 17].value = round(float(data[i]['利息费用'])/float(data[i]['利息收入']),2) # 利息费/收sh.cells[i + 1, 17].value = data[i]['利息费用']+' /'+ data[i]['利息收入'] # 利息费/收# sh.cells[i + 20, 17].value =data[i]['利息费用']# sh.cells[i + 20, 18].value =data[i]['利息收入']sh.cells[i + 1, 18].value = data[i][bandN[18]] # 利润(亿)sh.cells[i + 1, 19].value = data[i][bandN[19]] # 利润(亿)# sh.cells[i + 1, 20].value = data[i][bandN[20]] # 利润复增率# sh.cells[i + 1, 21].value = data[i][bandN[21]] # 营业额复合增长率# sh.cells[i + 1, 22].value = data[i][bandN[22]] # 季度增长率# sh.cells[i + 1, 23].value = data[i][bandN[23]] # 现金收入比# sh.cells[i + 1, 24].value = data[i][bandN[24]] # PEGsh.cells[i + 1, 25].value = data[i][bandN[25]] # 未分配利润sh.cells[i + 1, 26].value = data[i][bandN[26]] # 公积金sh.cells[i + 1, 27].value = data[i][bandN[27]] # 毛利率sh.cells[i + 1, 28].value = data[i][bandN[28]] # 净利率sh.cells[i + 1, 29].value = data[i][bandN[29]] # ROA总报酬率sh.cells[i + 1, 30].value = data[i][bandN[30]] # ROE净收益率sh.cells[i + 1, 31].value = data[i][bandN[31]] # 账款周期sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转sh.cells[i + 1, 33].value = data[i][bandN[33]] # 总资产周转率# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 43].value =round(float(data[i]['研发费用'])/float(data[i][bandN[6]]),4) # 研发/收入比sh.cells[i + 1, 34].value = data[i]['研发费用']+'/'+ data[i][bandN[6]]# 研发/收入比# sh.cells[i + 1, 45].value = float(data[i][bandN[6]]) # 研发/收入比# print(i)except:# continueprint(str(i),'excel写入错误')try:wb.save('fundWebd.xlsx')app.quit()# 获得当前窗口句柄sreach_windows = driver.current_window_handledriver.quit()# 获得当前所有打开的窗口的句柄all_handles = driver.window_handlesfor handle in all_handles:driver.switch_to.window(handle)driver.close()time.sleep(2)# driver.close()# driver.quit()except:print('有错误代码')

完整代码:重新调整股票代码和股票名称

import xlwings as xwimport requestsfrom bs4 import BeautifulSoupfrom datetime import datetimeimport jsonimport xlwtimport xlwings as xwfrom selenium import webdriverimport timeimport pandas as pdimport csvimport refrom selenium.webdriver import Chrome, ChromeOptions# item_list=[]df = pd.DataFrame()def data_a(html): # 获取基础信息1# with open('rrBand.html', 'r', encoding='utf-8') as f:# html = BeautifulSoup(f, 'lxml')# html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})# print(html.list)df = pd.DataFrame()# print(html)for i, item in enumerate(html):# print(item)# print(html.list_a)try:bandNanme = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:].strip(')')df['序号'] = '',df['股票'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:bandNanme.find('(')].strip(')'),df['代码'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('')[5:].strip(')').replace(':',''),# print(df[['股票','代码']])df['股价'] = item.find_all('div', attrs={'class', 'stock-current'})[0].text.strip('¥'),print(df['股价'])html.list_a = item.find_all('table', attrs={'class', 'quote-info'})for i,item_a in enumerate(html.list_a):# print(item_a.find_all('span'))# for i in range(21):df['总市值(亿)'] = item_a.find_all('span')[19].text.strip('亿'),df['总股本(亿)'] = '',# df['营业额'] = '',df['EPS每股收益'] = item_a.find_all('span')[16].text.strip(''),# df['分红'] = '',df['分红率'] = '',df['营市比'] = '',# print(item_a.find_all('span')[i].text,i,sep=',')#_a.find_all('span')[18].text.strip('亿'),i,sep=','),df['PE市盈率'] = item_a.find_all('span')[10].text.strip(''),df['PB市净率'] = str((float(df['股价']) / float(item_a.find_all('span')[20].text.strip(''))))[0:4],# df['负债率'] = '',# print(df['PB市净率'])print(str(i),"第一模块写入正常")except:print(str(i), "第一模块写入异常")## continuereturn df# df.to_csv('fundWebd.csv', index=None, encoding='utf-8-sig',sep=',')#mode='a', header=None,index=None,# print(df[['股价','总市值','EPS每股收益']])def data_b(html): # 获取基础信息2# url='/snowman/S/SH600282/detail#/ZYCWZB'# print('12')#with open('Band.html', 'r', encoding='utf-8') as f:# html = BeautifulSoup(f, 'lxml')# html.list_b = html.find_all('tbody')df = pd.DataFrame()bandIncome=[]for i, item in enumerate(html):# print(item)html.list_b_a=item.find_all('tr')for i,item in enumerate(html.list_b_a):html.list_b_a_a = item.find_all('td')# print(item.find_all('td'))for i, item in enumerate(html.list_b_a_a):try:html.list = item.find_all('p')[0].contents[0]bandIncome.append(html.list)# print(bandIncome,i,sep=',')# for i, item in enumerate(html.list):# print(item)html.list_b = item.find_all('table', attrs={'class', 'quote-info'})# print(html.list_a# bandNanme = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:].strip(')')df['利润(亿)'] = '',df['利润(亿)'] = '',print(str(i), "第二模块写入正常")except:# continueprint(str(i), "第二模块写入异常")df['营业额'] =bandIncome[0].strip('亿'),df['负债率'] =bandIncome[85].strip(''),df['经营现金流'] =bandIncome[50].strip(''),df['利润(亿)'] =bandIncome[13].strip('亿'),df['利润(亿)'] = bandIncome[10].strip('亿'),df['未分配利润'] = bandIncome[45].strip(''),df['公积金'] = bandIncome[40].strip(''),df['毛利率'] = bandIncome[75].strip(''),df['净利率'] = bandIncome[80].strip(''),df['ROA总报酬率'] = bandIncome[65].strip(''),df['ROE净收益率'] = bandIncome[60].strip(''),df['账款周期'] = bandIncome[120].strip(''),df['存货周转'] = bandIncome[150].strip(''),df['总资产周转率'] = bandIncome[145].strip(''),print(df)# for i in range(len(bandIncome)):#print(bandIncome[i],i,sep=',')return dfdef data_c(html):# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH601991/detail#/FHPS'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')df = pd.DataFrame()for i,item in enumerate(html):try:cut_a=item.find_all('td')[1].text.strip().find('派')cut_b = item.find_all('td')[1].text.strip().find('元')# print(cut_a,cut_b)# print(item)print(item.find_all('td')[1].text.strip()[cut_a+1:cut_b])df['分红']=item.find_all('td')[1].text.strip()[cut_a+1:cut_b],print(df)print(str(i),"第三模块写入正常")except:print(str(i), "第三模块写入异常")return dfdef data_d(html):# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH600282/detail#/ZCFZB'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')df = pd.DataFrame()# print(html)for i, item in enumerate(html):# try:cut_a = item.find_all('td')[7].text.strip('').find('亿')# print(cut_a)# print(item.find_all('td')[7].text.strip('')[0: cut_a-1])# print(item.find_all('td')[61].text.strip('')[0: cut_a-1])df['货币资金']=item.find_all('td')[7].text.strip('')[0: cut_a-1],df['存货']=item.find_all('td')[61].text.strip('')[0: cut_a-1],# for i in range(300):#print(item.find_all('p')[i],i,sep=',')print(df)print(str(i), "第四模块写入正常")# except:#print(str(i), "第四模块写入正常")return dfdef data_e(html):# with open('Band.html','r',encoding='utf-8') as f:# # url='/snowman/S/SH601600/detail#/GSLRB'# html=BeautifulSoup(f,'lxml')# html.list=html.find_all('tbody')df = pd.DataFrame()# print(html)for i, item in enumerate(html):try:cut_a = item.find_all('td')[43].text.strip('').find('亿')cut_b = item.find_all('td')[55].text.strip('').find('亿')cut_c = item.find_all('td')[61].text.strip('').find('亿')df['研发费用']=item.find_all('td')[43].text.strip('')[0:cut_a],df['利息费用'] =item.find_all('td')[55].text.strip('')[0:cut_b],df['利息收入'] = item.find_all('td')[61].text.strip('')[0:cut_c],# print(item.find_all('td')[43].text.strip('')[0:cut_a-1])# print(df['研发费用'].values, df['利息费用'].values, df['利息收入'].values ,sep=',')print(df)# for i in range(100):#print(item.find_all('td')[i].text,i,sep=',')print(str(i), "第五模块写入正常")except:print(str(i), "第五模块写入异常")return df# 写入csv中if __name__ == "__main__":# 创建一个workbookapp = xw.App(visible=False, add_book=False)wb = app.books.open('fundWebd.xlsx')# 创建一个worksheetsh = wb.sheets['worksheet']rng = [i for i in sh.range("c:c").value if i != None]#单元格内容j = sh.range('a1').expand('table').rows.count#序号app.display_alerts = Falseapp.screen_updating = False# rng = sh.range('a1').expand('table')# nrows = rng.rows.count# a = sh.range(f'a1:a{nrows}').value# a = [ i for i in sht.range(a:a).value if i != None]# 打开网页opt = ChromeOptions() # 创建Chrome参数对象opt.headless =False #True# # 把Chrome设置成可视化无界面模式,driver = Chrome(options=opt)# driver = webdriver.Chrome()df_a=[]df_b=[]df_c = []df_d = []for i in range(len(rng)-1):print(str(i),rng[i],'第'+str(i+1)+'只股票开始写入')#rng[i+1]try:bandcode=rng[i+1]#'SH601600'xueqiu_url='/S/'+bandcode#雪球网基础数据'/'#很好的ide工具xueqiu_url_a='/snowman/S/'+bandcode+'/detail#/ZYCWZB'#主要指标xueqiu_url_c= '/snowman/S/'+bandcode+'/detail#/FHPS'#分红xueqiu_url_d = '/snowman/S/'+bandcode+'/detail#/ZCFZB' # 存货xueqiu_url_e ='/snowman/S/'+bandcode+'/detail#/GSLRB'#研发、利息收入# DFCF_url='/PC_HSF10/OperationsRequired/Index?type=web&code=SH601600'#基础数据1加载driver.get(xueqiu_url)#加载网址time.sleep(3) # 休眠1秒source =driver.page_source#获取网页内容html=BeautifulSoup(source,'html.parser')#获取网页内容time.sleep(2) # 休眠1秒html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})df_a = data_a(html.list) # 执行语句块time.sleep(3) # 休眠1秒#基础数据2加载driver.back() # 加载网址time.sleep(2)driver.get(xueqiu_url_a) # 加载网址# driver.find_elements_by_class_name('btn active').click()# driver.find_element_by_xpath(".//*[@id='header']/div[1]/div/form/input[2]").click()time.sleep(4)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()# print(driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").text) # /span[contains(@class,'btn')]time.sleep(5) # 休眠4秒source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)time.sleep(2) # 休眠1秒html.list_b = html.find_all('tbody')df_b = data_b(html.list_b) # 执行语句块time.sleep(2)#基础数据三加载driver.back() # 加载网址time.sleep(4)driver.get(xueqiu_url_c) # 加载网址time.sleep(5)source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)time.sleep(3) # 休眠1秒html.list_c = html.find_all('tbody')df_c= data_c(html.list_c) # 执行语句块# print(html)time.sleep(3)# 基础数据四加载driver.get(xueqiu_url) # 加载网址time.sleep(2)driver.get(xueqiu_url_d) # 加载网址time.sleep(4)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()time.sleep(5)source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)time.sleep(1) # 休眠1秒html.list_d=html.find_all('tbody')df_d= data_d(html.list_d) # 执行语句块df_d.to_json('fundWebdTest.json', orient='records', force_ascii=False) # ,orient="values")time.sleep(1)# 基础数据五加载driver.back()time.sleep(2)driver.get(xueqiu_url_e )#xueqiu_url_etime.sleep(4)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()time.sleep(2)source=driver.page_source# 获取网页内容html=BeautifulSoup(source,'html.parser')# 获取网页内容# print(html)time.sleep(1) # 休眠1秒html.list_e = html.find_all('tbody')df_e = data_e(html.list_e) # 执行语句块df_e.to_json('fundWebdTest.json', orient='records', force_ascii=False) # ,orient="values")time.sleep(1)# with open('rrBand.html','w',encoding='utf-8') as f:#写入网页#f.write(source)except:# continueprint(str(i),"当页数据操作失败")# with open('rrBand.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})#df_a = data_a(html.list) # 执行语句块# with open('Band.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list_b = html.find_all('tbody')#df_b = data_b(html.list_b) # 执行语句块# with open('Band.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list_b = html.find_all('tbody')# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH600282/detail#/ZCFZB'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')# with open('Band.html','r',encoding='utf-8') as f:# # url='/snowman/S/SH601600/detail#/GSLRB'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')#以下为写入模板# df=pd.concat([df_a,df_b],axis=1)#列合并,axis=0表示按行合并df = df_a.append(df_b)# print(df_c,'测试')df1=pd.concat([df_a,df_b],axis=1)#按列合并# print(df1)df2=pd.concat([df1,df_c],axis=1)#按列合并# print(df2)df3 = pd.concat([df2, df_d], axis=1) # 按列合并# print(df3)df4 = pd.concat([df3, df_e], axis=1) # 按列合并print(df4)df=pd.concat([df4,df],axis=0)#按行合并,这里很重要print(df)df.to_csv("fundWebd.csv", mode="a+", header=None, index=None, encoding="utf-8-sig",sep=',')#提前写入vsv文件# item_list.append(df)# print(item_list)df.to_json('fundWebd.json',orient ='records', force_ascii=False)#,orient="values")# with open('fundWebd.json','r',encoding='utf-8') as f:#data = json.load(f)# item_list.append(data)# with open('.fund.json', 'w', encoding='utf-8')as f:#json.dump(item_list,f, indent=1, ensure_ascii=False)with open('fundWebd.json', 'r', encoding='utf-8') as f:data = json.load(f)# print(data[0]['股票'])bandN = ['序号', '股票', '代码', '股价', '总市值(亿)', '总股本(亿)', '营业额', 'EPS每股收益', '分红', '分红率', '营市比', 'PE市盈率',\'PB市净率', '负债率','经营现金流','货币资金','存货','利息费/收','利润(亿)','利润(亿)','利润复增率','营业额复合增长率',\'季度增长率','现金收入比','PEG','未分配利润','公积金','毛利率','净利率','ROA总报酬率','ROE净收益率','账款周期','存货周转','总资产周转率']for i in range(len(data)):#写入数据try:print(len(data))sh.cells[i+1,0].value=i+1sh.cells[i+1, 1].value=data[i][bandN[1]]sh.cells[i+1, 2].value=data[i][bandN[2]]sh.cells[i+1, 3].value = data[i][bandN[3]]sh.cells[i+1, 4].value = data[i][bandN[4]]sh.cells[i+1, 5].value = data[i][bandN[5]]sh.cells[i+1, 6].value = data[i][bandN[6]]#营业额sh.cells[i+1, 7].value = data[i][bandN[7]]sh.cells[i+1, 8].value = data[i][bandN[8]]#分红# sh.cells[i+1, 9].value = data[i][bandN[9]]#分红率# sh.cells[i+1, 10].value = data[i][bandN[10]]#营市比sh.cells[i+1, 11].value = data[i][bandN[11]]sh.cells[i+1, 12].value = data[i][bandN[12]]sh.cells[i+1, 13].value = data[i][bandN[13]]#负债率sh.cells[i + 1, 14].value = data[i][bandN[14]] # 经营现金流sh.cells[i + 1, 15].value = data[i][bandN[15]] # 货币资金sh.cells[i + 1, 16].value = data[i][bandN[16]] # 存货sh.cells[i + 1, 17].value = round(float(data[i]['利息费用'])/float(data[i]['利息收入']),2) # 利息费/收sh.cells[i + 1, 17].value = data[i]['利息费用']+' /'+ data[i]['利息收入'] # 利息费/收# sh.cells[i + 20, 17].value =data[i]['利息费用']# sh.cells[i + 20, 18].value =data[i]['利息收入']sh.cells[i + 1, 18].value = data[i][bandN[18]] # 利润(亿)sh.cells[i + 1, 19].value = data[i][bandN[19]] # 利润(亿)# sh.cells[i + 1, 20].value = data[i][bandN[20]] # 利润复增率# sh.cells[i + 1, 21].value = data[i][bandN[21]] # 营业额复合增长率# sh.cells[i + 1, 22].value = data[i][bandN[22]] # 季度增长率# sh.cells[i + 1, 23].value = data[i][bandN[23]] # 现金收入比# sh.cells[i + 1, 24].value = data[i][bandN[24]] # PEGsh.cells[i + 1, 25].value = data[i][bandN[25]] # 未分配利润sh.cells[i + 1, 26].value = data[i][bandN[26]] # 公积金sh.cells[i + 1, 27].value = data[i][bandN[27]] # 毛利率sh.cells[i + 1, 28].value = data[i][bandN[28]] # 净利率sh.cells[i + 1, 29].value = data[i][bandN[29]] # ROA总报酬率sh.cells[i + 1, 30].value = data[i][bandN[30]] # ROE净收益率sh.cells[i + 1, 31].value = data[i][bandN[31]] # 账款周期sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转sh.cells[i + 1, 33].value = data[i][bandN[33]] # 总资产周转率# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 43].value =round(float(data[i]['研发费用'])/float(data[i][bandN[6]]),4) # 研发/收入比sh.cells[i + 1, 34].value = data[i]['研发费用']+'/'+ data[i][bandN[6]]# 研发/收入比# sh.cells[i + 1, 45].value = float(data[i][bandN[6]]) # 研发/收入比# print(i)except:# continueprint('excel写入错误')try:wb.save('fundWebd.xlsx')app.quit()# 获得当前窗口句柄sreach_windows = driver.current_window_handledriver.quit()# 获得当前所有打开的窗口的句柄all_handles = driver.window_handlesfor handle in all_handles:driver.switch_to.window(handle)driver.close()time.sleep(2)# driver.close()# driver.quit()except:print('有错误代码')

完整代码:3,重新调整市盈率、市净率、股东人数、兼容浏览器

import xlwings as xwimport requestsfrom bs4 import BeautifulSoupfrom datetime import datetimeimport jsonimport xlwtimport xlwings as xwfrom selenium import webdriverimport timeimport pandas as pdimport csvimport refrom selenium.webdriver import Chrome, ChromeOptions, ActionChains# item_list=[]from mon.keys import Keysdf = pd.DataFrame()def data_a(html, numcode): # 获取基础信息1# with open('rrBand.html', 'r', encoding='utf-8') as f:# html = BeautifulSoup(f, 'lxml')# html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})# print(html.list)# numcode=''df = pd.DataFrame()# print(numcode,'一')# print(html)for i, item in enumerate(html):# print(item)# print(html.list_a)try:bandNanme = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:].strip(')')df['序号'] = '',# df['股票'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:bandNanme.find('(')].strip(# ')'),# df['代码'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('')[5:].strip(')').replace(':',''),# print(df[['股票','代码']])df['股价'] = item.find_all('div', attrs={'class', 'stock-current'})[0].text.strip('¥'),# print(df['股价'])html.list_a = item.find_all('table', attrs={'class', 'quote-info'})for i, item_a in enumerate(html.list_a):# print(item_a.find_all('span'))# for i in range(25):#print(item_a.find_all('span')[i].text,i,sep=',')df['股东持股'] = '',df['营市比'] = '',# print(item_a.find_all('span')[i].text,i,sep=',')#_a.find_all('span')[18].text.strip('亿'),i,sep=','),# print(item_a.find_all('span')[19].text.strip('亿'))# df['总市值(亿)'] = item_a.find_all('span')[19].text.strip('亿'),# df['PE市盈率'] = item_a.find_all('span')[10].text.strip(''),# df['PB市净率'] = item_a.find_all('span')[15].text.strip(''),# print( df['总市值(亿)'])if numcode == '6':print(numcode, '上海主板')df['总市值(亿)'] = item_a.find_all('span')[19].text.strip('亿'),df['PE市盈率'] = item_a.find_all('span')[10].text.strip(''),df['PB市净率'] = item_a.find_all('span')[15].text.strip(''),elif numcode == '3':print(numcode, '深圳创业板')df['总市值(亿)'] = item_a.find_all('span')[11].text.strip('亿'),df['PE市盈率'] = item_a.find_all('span')[16].text.strip(''),df['PB市净率'] = item_a.find_all('span')[21].text.strip(''),elif numcode == '0':print(numcode, '深圳主板')df['总市值(亿)'] = item_a.find_all('span')[19].text.strip('亿'),df['PE市盈率'] = item_a.find_all('span')[10].text.strip(''),df['PB市净率'] = item_a.find_all('span')[15].text.strip(''),else:print('错误')# print(df['PB市净率'])print(df)print(str(i), "第一模块写入正常")except:print(str(i), "第一模块写入异常")## continuereturn df# df.to_csv('fundWebd.csv', index=None, encoding='utf-8-sig',sep=',')#mode='a', header=None,index=None,# print(df[['股价','总市值','EPS每股收益']])def data_b(html): # 获取基础信息2主要指标# url='/snowman/S/SH600282/detail#/ZYCWZB'# print('12')#with open('Band.html', 'r', encoding='utf-8') as f:# html = BeautifulSoup(f, 'lxml')# html.list_b = html.find_all('tbody')df = pd.DataFrame()bandIncome = []for i, item in enumerate(html):# print(item)html.list_b_a = item.find_all('tr')for i, item in enumerate(html.list_b_a):html.list_b_a_a = item.find_all('td')# print(item.find_all('td'))for i, item in enumerate(html.list_b_a_a):try:html.list = item.find_all('p')[0].contents[0]bandIncome.append(html.list)# print(bandIncome,i,sep=',')# for i, item in enumerate(html.list):# print(item)html.list_b = item.find_all('table', attrs={'class', 'quote-info'})# print(html.list_a# bandNanme = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:].strip(')')# df['利润(亿)'] = '',# df['利润(亿)'] = '',# print(str(i), "第二模块写入正常")print("第二模块写入正常")except:# continueprint(str(i), "第二模块写入异常")df['营业额'] = bandIncome[0].strip('亿'),df['营业额'] = bandIncome[3].strip('亿'), # 营业额df['EPS每股收益'] = bandIncome[30].strip(''), # item_a.find_all('span')[16].text.strip(''),df['负债率'] = bandIncome[85].strip(''),df['经营现金流'] = bandIncome[50].strip(''),df['利润(亿)'] = bandIncome[13].strip('亿'),df['利润(亿)'] = bandIncome[10].strip('亿'),df['未分配利润'] = bandIncome[45].strip(''),df['公积金'] = bandIncome[40].strip(''),df['毛利率'] = bandIncome[75].strip(''),df['净利率'] = bandIncome[80].strip(''),df['ROA总报酬率'] = bandIncome[65].strip(''),df['ROE净收益率'] = bandIncome[55].strip(''),df['账款周期'] = bandIncome[125].strip(''),df['存货周转'] = bandIncome[120].strip(''),df['总资产周转率'] = bandIncome[145].strip(''),print(df)# for i in range(len(bandIncome)):#print(bandIncome[i],i,sep=',')return dfdef data_b_a(html): # 获取主要指标里的季度收入df = pd.DataFrame()bandIncome = []for i, item in enumerate(html):html.list_b_a = item.find_all('td')# print(item.find_all('td'))cut_a = item.find_all('td')[7].text.strip('').find('亿')cut_b = item.find_all('td')[11].text.strip('').find('亿')df['季度'] = item.find_all('td')[7].text.strip('')[0:cut_a],df['季度'] = item.find_all('td')[11].text.strip('')[0:cut_b],print(df)# print('data_b_a')# for i in range(100):#print(item.find_all('td')[i].text,i,sep=',')return dfdef data_c(html, html1):# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH601991/detail#/FHPS'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')df = pd.DataFrame()for i, item in enumerate(html1):# print(item.text)bandNanme = item.text# print(item.text.strip('')[bandNanme.find('('):].strip(')'))df['股票'] = item.text.strip('')[0:bandNanme.find('(')].strip(')'),df['代码'] = item.text.strip('')[bandNanme.find('(') + 1:].strip(')'),# print(df)# print(df['股票'].valuse, df['代码'].values)# df['股票'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('()')[0:bandNanme.find('(')].strip(# ')'),# df['代码'] = item.find_all('div', attrs={'class', 'stock-name'})[0].text.strip('')[5:].strip(')').replace(':',''),# print(df[['股票','代码']])for i, item in enumerate(html):try:cut_a = item.find_all('td')[1].text.strip().find('派')cut_b = item.find_all('td')[1].text.strip().find('元')# print(cut_a,cut_b)# print(item)# print(item.find_all('td')[1].text.strip()[cut_a+1:cut_b])df['分红率'] = '',df['分红'] = item.find_all('td')[1].text.strip()[cut_a + 1:cut_b],print(df)print(str(i), "第三模块写入正常")except:print(str(i), "第三模块写入异常")return dfdef data_d(html):# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH600282/detail#/ZCFZB'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')df = pd.DataFrame()# print(html)for i, item in enumerate(html):# try:cut_a = item.find_all('td')[7].text.strip('').find('亿')cut_b = item.find_all('td')[61].text.strip('').find('亿')# print(cut_a)# print(item.find_all('td')[7].text.strip('')[0: cut_a-1])# print(item.find_all('td')[61].text.strip('')[0: cut_a-1])df['货币资金'] = item.find_all('td')[7].text.strip('')[0: cut_a],df['存货'] = item.find_all('td')[61].text.strip('')[0: cut_b],# for i in range(300):#print(item.find_all('p')[i],i,sep=',')print(df)print(str(i), "第四模块写入正常")# except:#print(str(i), "第四模块写入正常")return dfdef data_e(html):# with open('Band.html','r',encoding='utf-8') as f:# # url='/snowman/S/SH601600/detail#/GSLRB'# html=BeautifulSoup(f,'lxml')# html.list=html.find_all('tbody')df = pd.DataFrame()# print(html)for i, item in enumerate(html):try:cut_a = item.find_all('td')[43].text.strip('').find('+')cut_b = item.find_all('td')[55].text.strip('').find('+')cut_c = item.find_all('td')[61].text.strip('').find('+')df['研发费用'] = item.find_all('td')[43].text.strip(''), # [0:cut_a],df['利息费用'] = item.find_all('td')[55].text.strip(''), # [0:cut_b],df['利息收入'] = item.find_all('td')[61].text.strip(''), # [0:cut_c],# print(item.find_all('td')[43].text.strip('')[0:cut_a-1])# print(df['研发费用'].values, df['利息费用'].values, df['利息收入'].values ,sep=',')print(df)# for i in range(100):#print(item.find_all('td')[i].text,i,sep=',')print(str(i), "第五模块写入正常")except:print(str(i), "第五模块写入异常")return dfdef data_f(html): # 获取股东数据# url='/PC_HSF10/ShareholderResearch/Index?type=web&code=sh601601'df = pd.DataFrame()# print(html)shareheld = []for i, item in enumerate(html):try:# html.list = item.find_all('tr')[2].contents[3] # 股东人数# print(html.list)# html.list_a = item.find_all('tr')[10].contents[3] # 十大股东合计占比# print(html.list_a)# for i, item in enumerate(html.list):#print(item[3],i,sep=',')# for i, item in enumerate(html.list_a):df['股东人数'] = item.find_all('tr')[2].contents[3].text.strip('万').strip(','),df['十大流通股东持股占比'] = item.find_all('tr')[10].contents[3].text.strip('万').strip(',') + '%',# print(item.find_all('td',attrs={'class':'tips-dataL'}))# print(item.find_all('td',attrs={'class':'tips-dataL'})[0])# print(item.find_all('td', attrs={'class': 'tips-dataL'})[0])# html.list = item.find_all('td', attrs={'class': 'tips-dataL'})# for i, item in enumerate(html.list):## print(item.text.strip('万'),i,sep=',')## print(len(html.list))#shareheld.append(item.text.strip('万').strip(','))# if i<=62:#df['股东人数'] = shareheld[0],#df['十大流通股东持股占比'] = shareheld[56] + '%',# elif i<=71:#df['股东人数'] = shareheld[0],#df['十大流通股东持股占比'] = shareheld[64] + '%',# else :#df['股东人数'] = shareheld[0],#df['十大流通股东持股占比'] = shareheld[80] + '%',# print( item.find_all('tr'))print(str(i), '股东数据写入正常')except:print(str(i), '股东数据写入异常')print(df, '股东')return df# 写入csv中if __name__ == "__main__":# 创建一个workbookapp = xw.App(visible=False, add_book=False)wb = app.books.open('fundWebd.xlsx')# 创建一个worksheetsh = wb.sheets['worksheet']rng = [i for i in sh.range("c:c").value if i != None] # 单元格内容j = sh.range('a1').expand('table').rows.count # 序号app.display_alerts = Falseapp.screen_updating = False# rng = sh.range('a1').expand('table')# nrows = rng.rows.count# a = sh.range(f'a1:a{nrows}').value# a = [ i for i in sht.range(a:a).value if i != None]# 打开网页opt = ChromeOptions() # 创建Chrome参数对象opt.headless = False # True# # 把Chrome设置成可视化无界面模式,driver = Chrome(options=opt)# driver = webdriver.Chrome()df_a = []df_b = []df_b_a = []df_c = []df_d = []df_e = []df_f = []for i in range(len(rng) - 1):print(str(i), rng[i + 1], '第' + str(i + 1) + '只股票开始写入') # rng[i+1]try:bandcode = rng[i + 1] # 'SH601600'xueqiu_url = '/S/' + bandcode # 雪球网基础数据'/'#很好的ide工具xueqiu_url_a = '/snowman/S/' + bandcode + '/detail#/ZYCWZB' # 主要指标xueqiu_url_c = '/snowman/S/' + bandcode + '/detail#/FHPS' # 分红xueqiu_url_d = '/snowman/S/' + bandcode + '/detail#/ZCFZB' # 存货xueqiu_url_e = '/snowman/S/' + bandcode + '/detail#/GSLRB' # 研发、利息收入xueqiu_url_f = '/PC_HSF10/ShareholderResearch/Index?type=web&code=' + bandcode # 主要指标# DFCF_url='/PC_HSF10/OperationsRequired/Index?type=web&code=SH601600'k=0.6# 基础数据1加载driver.get(xueqiu_url) # 加载网址# time.sleep(1) # 休眠1秒source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# time.sleep(1) # 休眠1秒html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})numcode = rng[i + 1].strip('')[2:3]# print(numcode)df_a = data_a(html.list, numcode) # 执行语句块# df_a.to_json('fundWebdTest.json', orient='records', force_ascii=False) # ,orient="values")time.sleep(k) # 休眠1秒# 基础数据2加载主要指教driver.back() # 加载网址time.sleep(k)# driver.get(xueqiu_url) # 加载网址# time.sleep(0.5) # 休眠1秒# ActionChains(driver).key_down(Keys.CONTROL).send_keys("w").key_up(Keys.CONTROL).perform()#关闭标签# driver.find_element_by_xpath(".//div[contains(@class,'stock-links')]/ul[6]/li[2]/a").click()#加载主要指标driver.get(xueqiu_url_a) # 加载网址time.sleep(k*4+0.5) # 休眠1秒# driver.find_elements_by_class_name('btn active').click()# driver.find_element_by_xpath(".//*[@id='header']/div[1]/div/form/input[2]").click()source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)# time.sleep(1) # 休眠1秒html.list_b_a = html.find_all('tbody')df_b_a = data_b_a(html.list_b_a) # 执行语句块# print(html.list_b_a)# 执行后点击任务# time.sleep(1)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()# print(driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").text) # /span[contains(@class,'btn')]time.sleep(k+0.7) # 休眠4秒source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)# time.sleep(1) # 休眠1秒html.list_b = html.find_all('tbody')df_b = data_b(html.list_b) # 执行语句块# time.sleep(1)# 基础数据三加载分红# driver.back() # 加载网址# time.sleep(0.5)driver.get(xueqiu_url_c) # 加载网址time.sleep(k*2.5)source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)# time.sleep(1) # 休眠1秒html.list_c = html.find_all('tbody')html.list_c2 = html.find_all('div', attrs={'stock-info-name'})# print(html.list_c2)df_c = data_c(html.list_c, html.list_c2) # 执行语句块# print(html)# time.sleep(1)# 基础数据四加载资产负债表driver.get(xueqiu_url) # 加载网址time.sleep(k-0.2)driver.get(xueqiu_url_d) # 加载网址time.sleep(k*2.5+0.1)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()time.sleep(k+0.3)source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)# time.sleep(1) # 休眠1秒html.list_d = html.find_all('tbody')df_d = data_d(html.list_d) # 执行语句块# df_d.to_json('fundWebdTest.json', orient='records', force_ascii=False) # ,orient="values")# time.sleep(1)# 基础数据五加载利润表driver.get(xueqiu_url_f) # 加载网址time.sleep(k-0.2)driver.get(xueqiu_url_e) # xueqiu_url_etime.sleep(k*2.5+0.2+0.1)driver.find_element_by_xpath(".//div[contains(@class,'stock-info-btn-list')]/span[2]").click()time.sleep(k+0.3)source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# print(html)# time.sleep(1) # 休眠1秒html.list_e = html.find_all('tbody')df_e = data_e(html.list_e) # 执行语句块# df_e.to_json('fundWebdTest.json', orient='records', force_ascii=False) # ,orient="values")# time.sleep(1)# with open('rrBand.html','w',encoding='utf-8') as f:#写入网页# f.write(source)# 股东数据加载第六模块driver.get(xueqiu_url_f) # 加载网址time.sleep(k-0.2) # 休眠1秒source = driver.page_source # 获取网页内容html = BeautifulSoup(source, 'html.parser') # 获取网页内容# time.sleep(1) # 休眠1秒# html.list_f = html.find_all('tbody')html.list_f = html.find_all('table',attrs={'id':'Table0'})# print(numcode)df_f = data_f(html.list_f) # 执行语句块# df_f.to_json('fundWebdTest.json', orient='records',intent=1, force_ascii=False) # ,orient="values")time.sleep(k+0.5) # 休眠1秒except:continue# print(str(i), "当页数据操作失败")# with open('rrBand.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list = html.find_all('div', attrs={'class': 'container-sm float-left stock__main'})#df_a = data_a(html.list) # 执行语句块# with open('Band.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list_b = html.find_all('tbody')#df_b = data_b(html.list_b) # 执行语句块# with open('Band.html', 'r', encoding='utf-8') as f:#html = BeautifulSoup(f, 'lxml')#html.list_b = html.find_all('tbody')# with open('Band.html','r',encoding='utf-8') as f:## url='/snowman/S/SH600282/detail#/ZCFZB'#html=BeautifulSoup(f,'lxml')#html.list=html.find_all('tbody')# with open('Band.html','r',encoding='utf-8') as f:# # url='/snowman/S/SH601600/detail#/GSLRB'# html=BeautifulSoup(f,'lxml')# html.list=html.find_all('tbody')# 以下为写入模板# df=pd.concat([df_a,df_b],axis=1)#列合并,axis=0表示按行合并df = df_a.append(df_b)# print(df_c,'测试')df1 = pd.concat([df_a, df_b_a], axis=1) # 按列合并# print(df1)df2 = pd.concat([df1, df_b], axis=1) # 按列合并# print(df1)df3 = pd.concat([df2, df_c], axis=1) # 按列合并# print(df2)df4 = pd.concat([df3, df_d], axis=1) # 按列合并# print(df3)df5 = pd.concat([df4, df_e], axis=1) # 按列合并print(df5)df6 = pd.concat([df5, df_f], axis=1) # 按列合并print(df6)df = pd.concat([df6, df], axis=0) # 按行合并,这里很重要print(df)df.to_csv("fundWebd.csv", mode="a+", header=None, index=None, encoding="utf-8-sig", sep=',') # 提前写入vsv文件# item_list.append(df)# print(item_list)df.to_json('fundWebd.json', orient='records', indent=1, force_ascii=False) # ,orient="values")# with open('fundWebd.json','r',encoding='utf-8') as f:#data = json.load(f)# item_list.append(data)# with open('.fund.json', 'w', encoding='utf-8')as f:#json.dump(item_list,f, indent=1, ensure_ascii=False)time.sleep(0.8)with open('fundWebd.json', 'r', encoding='utf-8') as f:data = json.load(f)# print(data[0]['股票'])time.sleep(0.8)bandN = ['序号', '股票', '代码', '股价', '总市值(亿)', '股东持股', '营业额', 'EPS每股收益', '分红', '分红率', '营市比', 'PE市盈率','PB市净率', '负债率', '经营现金流', '货币资金', '存货', '利息费/收', '利润(亿)', '利润(亿)', '利润复增率', '营业额复合增长率','季度增长率', '现金收入比', 'PEG', '未分配利润', '公积金', '毛利率', '净利率', 'ROA总报酬率', 'ROE净收益率', '账款周期', '存货周转', '总资产周转率']for i in range(len(data)): # 写入数据try:print(len(data))sh.cells[i + 1, 0].value = i + 1sh.cells[i + 1, 1].value = data[i][bandN[1]]sh.cells[i + 1, 2].value = data[i][bandN[2]]sh.cells[i + 1, 3].value = data[i][bandN[3]] # 股价sh.cells[i + 1, 4].value = data[i][bandN[4]]# print(data[i]['十大流通股东持股占比'][0:1])if data[i]['十大流通股东持股占比'][0:1]!='-':sh.cells[i + 1, 5].value = '=(' + data[i][bandN[4]] + '*100000000-(' + data[i][bandN[4]] + '/' + \data[i][bandN[3]] + ')*' + data[i]['十大流通股东持股占比'] + '*' + data[i][bandN[3]] + '*100000000)/(' + data[i]['股东人数'] + '*10000)/10000' # data[i]['十大流通股东持股占比']+' /'+ data[i]['股东人数']#round(float(data[i]['总市值(亿)'])/float(data[i]['股价']),2)#股东持股else:sh.cells[i + 1, 5].value =''sh.cells[i + 1, 6].value = data[i]['营业额'] # 营业额sh.cells[i + 1, 7].value = data[i][bandN[7]]sh.cells[i + 1, 8].value = data[i][bandN[8]] # 分红# sh.cells[i+1, 9].value = data[i][bandN[9]]#分红率# sh.cells[i+1, 10].value = data[i][bandN[10]]#营市比sh.cells[i + 1, 11].value = data[i][bandN[11]]sh.cells[i + 1, 12].value = data[i][bandN[12]]sh.cells[i + 1, 13].value = data[i][bandN[13]] + '%' # 负债率sh.cells[i + 1, 14].value = data[i][bandN[14]] # 经营现金流sh.cells[i + 1, 15].value = data[i][bandN[15]] # 货币资金sh.cells[i + 1, 16].value = data[i][bandN[16]] # 存货# sh.cells[i + 1, 17].value = round(float(data[i]['利息费用'])/float(data[i]['利息收入']),2) # 利息费/收sh.cells[i + 1, 17].value = data[i]['利息费用'] + ' /' + data[i]['利息收入'] # 利息费/收# sh.cells[i + 20, 17].value =data[i]['利息费用']# sh.cells[i + 20, 18].value =data[i]['利息收入']sh.cells[i + 1, 18].value = data[i]['利润(亿)'] # 利润(亿)sh.cells[i + 1, 19].value = data[i]['利润(亿)'] # 利润(亿)# sh.cells[i + 1, 20].value = data[i][bandN[20]] # 利润复增率sh.cells[i + 1, 21].value = '=EXP(LN(' + data[i]['营业额'] + '/' + data[i]['营业额'] + ')/3)-1' # 营业额复合增长率sh.cells[i + 1, 22].value = '=EXP(LN(' + data[i]['季度'] + ' /' + data[i]['季度'] + ')/3)-1' # 季度增长率# sh.cells[i + 1, 23].value = data[i][bandN[23]] # 现金收入比# sh.cells[i + 1, 24].value = data[i][bandN[24]] # PEGsh.cells[i + 1, 25].value = data[i][bandN[25]] # 未分配利润sh.cells[i + 1, 26].value = data[i][bandN[26]] # 公积金sh.cells[i + 1, 27].value = data[i][bandN[27]] + '%' # 毛利率sh.cells[i + 1, 28].value = data[i][bandN[28]] + '%' # 净利率sh.cells[i + 1, 29].value = data[i][bandN[29]] + '%' # ROA总报酬率sh.cells[i + 1, 30].value = data[i][bandN[30]] + '%' # ROE净收益率sh.cells[i + 1, 31].value = data[i][bandN[31]] # 账款周期sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转sh.cells[i + 1, 33].value = data[i][bandN[33]] + '%' # 总资产周转率# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 32].value = data[i][bandN[32]] # 存货周转# sh.cells[i + 1, 43].value =round(float(data[i]['研发费用'])/float(data[i][bandN[6]]),4) # 研发/收入比sh.cells[i + 1, 34].value = data[i]['研发费用'] + '/' + data[i][bandN[6]] # 研发/收入比# sh.cells[i + 1, 45].value = float(data[i][bandN[6]]) # 研发/收入比# print(i)except:continue# print(str(i), 'excel写入错误')try:wb.save('fundWebd.xlsx')wb.close()app.quit()# 获得当前窗口句柄sreach_windows = driver.current_window_handledriver.quit()# 获得当前所有打开的窗口的句柄all_handles = driver.window_handlesfor handle in all_handles:driver.switch_to.window(handle)driver.close()time.sleep(2)# driver.close()# driver.quit()except:print('有错误代码')

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。