1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > pandas数据分析京东评论者衣服购买情况pyecharts生成可视化图表

pandas数据分析京东评论者衣服购买情况pyecharts生成可视化图表

时间:2022-07-18 08:25:29

相关推荐

pandas数据分析京东评论者衣服购买情况pyecharts生成可视化图表

pyecharts官网:/#/zh-cn/composite_charts

# /weixin_45081575import osimport jsonimport requestsimport pandas as pdimport jieba.analysefrom pyecharts import options as optsfrom pyecharts.globals import ThemeTypefrom pyecharts.globals import SymbolTypefrom pyecharts.charts import Pie,Bar,Map,WordCloud,Liquid,Pageurl = "/comment/productPageComments.action?callback=fetchJSON_comment98vv59&productId=100001068301&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&rid=0&fold=1"# url = "/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100002148075&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&rid=0&fold=1"headers = {'Referer': '/100001068301.html',# 'Sec-Fetch-Mode': 'no-cors','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'}# 过滤词stop_words_txt = "stop_words.txt"def get_comment(file_name):num = 1for i in range(0,50):print(f"处理第{i+1}页")resp = requests.get(url.format(i), headers=headers)resp_list = json.loads(resp.text[24:-2])# content 评价;productColor 颜色;productSize 尺码 referenceTime 购买时间 nickname昵称comment_list = []for comment in resp_list["comments"]:print(comment["nickname"])data = {"num":num,"nickname":comment["nickname"],"bra_size":comment['productSize'],"color":comment['productColor'],"comment":(comment['content']).replace("\n"," "),"date":comment['referenceTime']}comment_list.append(data)num += 1save_to_excel(file_name,comment_list)print("表格保存完毕")def save_to_excel(file_name,comment_list):# 如果存在,则追加数据到表格,第一次执行的时候会创建表格,之后的数据则以追加的形式写入if os.path.exists(file_name):df = pd.read_excel(file_name)df = df.append(comment_list)else:df = pd.DataFrame(comment_list)writer = pd.ExcelWriter(file_name)df.to_excel(excel_writer=writer,sheet_name="jd_comment",columns=["num","nickname","bra_size","color","comment","date"],index=False,encoding="utf-8")writer.save()# 颜色分布柱状图 /weixin_45081575/article/details/103449805def color_chart(df):print("准备生成:颜色分布柱状图")colors = list(df.color.value_counts().items())colors = colors[:10] # 取前面10个颜色# print(colors)bar = (Bar().add_xaxis(list(data[0] for data in colors)).add_yaxis("颜色购买统计",list(data[1]for data in colors)).set_global_opts(title_opts=opts.TitleOpts(title="颜色分布柱状图"),xaxis_opts=opts.AxisOpts(name="颜色"),yaxis_opts=opts.AxisOpts(name="数量"),toolbox_opts=opts.ToolboxOpts() # ToolboxOpts工具箱))bar.render(path="颜色柱状图.html")# 购买者分布柱状图def nick_name(df):print("准备生成:购买者分布柱状图")nick_names = list(df.nickname.value_counts().items())nick_names = nick_names[:10]bar = (Bar().add_xaxis(list(data[0] for data in nick_names)).add_yaxis("购买者数量",list(data[1] for data in nick_names)).set_global_opts(title_opts=opts.TitleOpts(title="购买者分布柱状图"),xaxis_opts=opts.AxisOpts(name="购买者"),yaxis_opts=opts.AxisOpts(name="数量"),toolbox_opts=opts.ToolboxOpts()))bar.render("购买者分布柱状图.html")# 尺码分布图def size_chart(df):print("准备生成:尺码分布柱状图")sizes = sorted(list(df.bra_size.value_counts().items()))bar = (Bar().add_xaxis(list(data[0] for data in sizes)).add_yaxis("尺码购买统计",list(data[1] for data in sizes)).set_global_opts(title_opts=opts.TitleOpts(title="尺码分布柱状图"),xaxis_opts=opts.AxisOpts(name="尺码"),yaxis_opts=opts.AxisOpts(name="数量"),toolbox_opts=opts.ToolboxOpts()))bar.render("尺码柱状图.html")# 区间饼图和柱状图def avg_cup(df):print("准备生成:区间饼图和柱状图")size_list = sorted(list(df.bra_size.value_counts().items()))cup_dic = {i:0 for i in "ABCD"}for data in size_list:if "A" in data[0]:cup_dic['A'] += data[1]if "B" in data[0]:cup_dic['B'] += data[1]if "C" in data[0]:cup_dic['C'] += data[1]if "D" in data[0]:cup_dic['D'] += data[1]bar = (Bar().add_xaxis(list(cup_dic.keys())).add_yaxis("尺码数量",list(cup_dic.values())).set_global_opts(title_opts=opts.TitleOpts(title="尺码区间柱状图"),xaxis_opts=opts.AxisOpts(name="尺码"),yaxis_opts=opts.AxisOpts(name="数量"),toolbox_opts=opts.ToolboxOpts()))bar.render("区间柱状图.html")pie = (Pie().add("数量",list(cup_dic.items())).set_global_opts(title_opts=opts.TitleOpts(title="尺码区间饼图")).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{c}(占比:{d}%)")) # b代表名字,c代表数量,d代表百分比)pie.render("区间饼图.html")return (bar,pie)# 评论词云def word_cloud(df):print("准备生成:评论词云")if os.path.exists(stop_words_txt):jieba.analyse.set_stop_words(stop_words_txt)kw_list = jieba.analyse.textrank(''.join(ment),topK=65,withWeight=True)word_cloud = (WordCloud(init_opts=opts.InitOpts(bg_color='#c7edcc'))# '传入列表,word_size_range为字体大小,shape为词云的形状'# 形状 RECT、ROUND_RECT、TRIANGLE、DIAMOND、ARROW# mask_image = "aizhong-logo.png" # 自定义形状# .add("",kw_list,word_size_range=[15, 100],mask_image="aizhong-logo.png").add("",kw_list,word_size_range=[15, 100],shape=SymbolType.DIAMOND).set_global_opts(title_opts=opts.TitleOpts(title="评论标题词云Top65"),toolbox_opts=opts.ToolboxOpts()))word_cloud.render("词云.html")return word_cloud# 水滴图def water():print("准备生成:今日湿度水滴图")liquid = (Liquid().add("lq", [0.45,0.5,0.6],is_outline_show=False,shape=SymbolType.DIAMOND) # 第一个值为显示的值百分比,第二个指为水的分量.set_global_opts(title_opts=opts.TitleOpts(title="今日湿度水滴图"),toolbox_opts=opts.ToolboxOpts()))liquid.render("今日湿度水滴图.html")return liquidif __name__ == '__main__':file_name = "jd_comment.xlsx"if not os.path.exists(file_name):print("表格不存在")get_comment(file_name)df = pd.read_excel(file_name)color_chart(df)word_cloud = word_cloud(df)nick_name(df)size_chart(df)bar,pie = avg_cup(df)liquid = water()# 接下来生成组合图表 /#/zh-cn/composite_chartspage = Page(layout=Page.DraggablePageLayout)page.add(liquid,bar,pie,word_cloud)# page.render("all.html")# 这个生成的是按顺序存放的图表# 先生成all.html,然后就不要再重新生成了,直接在这上面调整到合适位置后点击左上角save config,生成chart_config.json# 读取all.html,并利用chart_config.json的设置重新生成新的resize_render.htmlPage.save_resize_html("all.html", cfg_file="chart_config.json")

参考:/weixin_45081575/article/details/103449805

其中过滤词stop_words.txt,第一行要空出来,从第二行开始写,一行一个词,保存成utf-8编码格式,例如:京东

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。