利用百度地图开发者中心中的web API获取城市POI数据,后期会写出完整的多种参数详细数据获取教程。本次只写出了根据关键词和目的地的数据获取程序。
将爬取下来的数据保存到本地csv文件。
百度API返回的是标准的json格式数据。具体实现程序如下:
import time
import json
import sys
import requests
from tqdm import tqdm #进度条显示包
def get_info(keyname, place, ak, path):
"""
Function:
Arg:
keyname:传入的是一个关键字列表
place:传入的是一个查询地方(str)
ak:申请的百度地图开发者中心的AK
"""
urls = []
for i in keyname:
url = "http://api./place/v2/search?query="+i+"®ion="+place+"&output=json&ak="+ak
urls.append(url)
for url in tqdm(urls):
try:
time.sleep(0.1) # 为了防止并发量报警,设置了一个5秒的休眠。
html = requests.get(url) # 获取网页信息
data = html.json() # 获取网页信息的json格式数据
print(data)
except:
time.sleep(1)
continue
if data['status'] == 0:
if 'results' in data:
for item in data['results']:
name = item.get('name', 'default')
if 'location' in item:
lat = item['location']['lat']
lon = item['location']['lng']
else:
lat = item.get('location', 'default')
lon = item.get('location', 'default')
address = item.get('address', 'default')
province = item.get('province', 'default')
city= item.get('city', 'default')
area= item.get('area', 'default')
street = item.get('street_id', 'default')
detail = item.get('detail', 'default')
uid = item.get('uid', 'default')
# j_str = jname+','+str(jlat)+','+str(jlon)+','+jadd+'\n'
with open(path, 'a', encoding='utf-8') as f:
f.write(str(name) + ',' + str(lat) + ',' + str(lon) + ',' + str(address).replace(',', ';') + ',' + province + ',' + city
+ ',' + area + ',' + str(street) + ',' + str(detail) + ',' + str(uid) + ',' + url.replace(',', ';') + '\n')
# print('已经成功获取第%d分之%d条url信息' % (times, url_count))
else:
print('no results')
pass
else:
ignor += 1
# for ii in range(0, 12):
# if ii < 12:
# print("已等待%d" % ii + "小时")
# time.sleep(3600)
# print("12小时等待结束")
print("已经忽略%d条url" % ignor)
continue
if __name__ == '__main__':
keyname=["中奇大厦","他她公寓","阳光大厦","凯风大厦","鑫成大厦"]
place="武汉"
ak="你在百度申请的AK"
savepath = r"E:\dosn2\save\data_download2.csv"
with open(savepath, 'a', encoding='utf-8') as f:
f.write("name" + ',' + "lat" + ',' + "lon" + ',' +"add" + ',' + "province" + ',' + "city"
+ ',' + "area" + ',' + "street" + ',' + "detail"+ ',' + "uid" + ',' + "url" + '\n')
get_info(keyname, place, ak, savepath)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72