1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
| import requests import json import time import random import pandas as pd import argparse
def fetch_baidu_pois(city, keyword, ak): base_url = "http://api.map.baidu.com/place/v2/search" page_size = 20 page_num = 0 all_pois = [] while True: params = { "query": keyword, "region": city, "output": "json", "ak": ak, "page_size": page_size, "page_num": page_num, "scope": 2 } response = requests.get(base_url, params=params) data = response.json() if data['status'] != 0: print(f"百度API错误: {data.get('message', '未知错误')}") break results = data.get('results', []) for poi in results: all_pois.append({ 'name': poi.get('name'), 'address': poi.get('address'), 'phone': poi.get('telephone'), 'type': poi.get('detail_info', {}).get('tag', '未知'), 'source': 'Baidu', 'lng': poi.get('location', {}).get('lng'), 'lat': poi.get('location', {}).get('lat') }) if len(results) < page_size: break page_num += 1 time.sleep(random.uniform(1, 3)) return all_pois
def fetch_amap_pois(city, keyword, key): base_url = "https://restapi.amap.com/v3/place/text" offset = 20 page = 1 all_pois = [] while True: params = { "keywords": keyword, "city": city, "key": key, "offset": offset, "page": page, "extensions": "all" } response = requests.get(base_url, params=params) data = response.json() if data['status'] != '1': print(f"高德API错误: {data.get('info', '未知错误')}") break pois = data.get('pois', []) for poi in pois: location = poi.get('location', '').split(',') all_pois.append({ 'name': poi.get('name'), 'address': poi.get('address'), 'phone': poi.get('tel'), 'type': poi.get('type', '未知'), 'source': 'AMap', 'lng': location[0] if location else None, 'lat': location[1] if len(location) > 1 else None }) if len(pois) < offset: break page += 1 time.sleep(random.uniform(1, 3)) return all_pois
def main(): parser = argparse.ArgumentParser(description="百度/高德地图POI采集工具") parser.add_argument("--map", required=True, choices=['baidu', 'amap'], help="地图来源: baidu 或 amap") parser.add_argument("--key", required=True, help="API Key: 百度AK 或 高德Key") parser.add_argument("--keywords", default="汽修厂,改装厂", help="关键词,逗号分隔,默认: 汽修厂,改装厂") parser.add_argument("--cities", required=True, help="城市列表,逗号分隔,如: 上海,北京") args = parser.parse_args() map_source = args.map api_key = args.key keywords = [kw.strip() for kw in args.keywords.split(',')] cities = [city.strip() for city in args.cities.split(',')] for city in cities: all_data = [] for keyword in keywords: if map_source == 'baidu': pois = fetch_baidu_pois(city, keyword, api_key) else: pois = fetch_amap_pois(city, keyword, api_key) all_data.extend(pois) print(f"{city} - {keyword} 采集 {len(pois)} 条") if all_data: df = pd.DataFrame(all_data) df.drop_duplicates(subset=['name', 'address'], inplace=True) output_file = f"{city}_pois.xlsx" df.to_excel(output_file, index=False, engine='openpyxl') print(f"{city} 数据导出到 {output_file},总 {len(df)} 条(去重后)")
if __name__ == "__main__": main()
|