当前位置: 代码迷 >> 综合 >> spider-gaode-3
  详细解决方案

spider-gaode-3

热度:44   发布时间:2023-12-22 10:36:02.0
import requests
import json
import time
import pandas as pd
import hashlib
import random
from django.shortcuts import render,HttpResponse
from . import modelssl = set() # url存储
count = 0 # key计数
req_count = 0 # 请求数量
keys = ['227b360433411dd173a81ab636a89543','6d286676e227411e91e045d97994bfa3','8325164e247e15eea68b59e89200988b']def getTypes():df = pd.read_excel(r'D:\source\gaode\gaodeMap\gaodeSpider\amap_poicode.xlsx',sheet_name='POI分类与编码(中英文)')type_list = list(df.iloc[:,1].map(lambda x: str(x).rjust(6,'0')))return type_listdef getHeaders():USER_AGENTS = ["Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1","Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5","Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre","Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11","Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"]headers={ "User-Agent":random.choice(USER_AGENTS) }return headersdef getKeys():global countif  count == 0:count += 1return keys[0]count = 0return keys[1] def getjson(page,types):pa = {'key': getKeys(),'keywords': '','types':types,'city':610113,'citylimit':'true','offset': 20,'extensions': 'all','children':1,'page':page}r = requests.get('https://restapi.amap.com/v3/place/text?', params=pa, headers=getHeaders())print('第{}次请求,请求url是{}'.format(req_count,r.url))# url 去重md5_obj = hashlib.md5()md5_obj.update((r.url).encode(encoding='utf-8'))encryp_url = md5_obj.hexdigest()if encryp_url in sl:return Falsesl.add(encryp_url)decodejson = json.loads(r.text)return decodejsondef gaodeSpider(request):for types in getTypes():not_last_page = Truepage = 1while not_last_page:decodejson = getjson(page,types)if decodejson:if(decodejson['count'] == '0'):not_last_page = Falsebreakfor eachone in decodejson['pois']:try:id = eachone['id']      except:id = Nonetry:parentId = eachone['parent']   except:parentId = Nonetry:location = eachone['location']   except:location = Nonetry:name = eachone['name']except:name = Nonetry:address = eachone['address']  except:address = Nonetry:tel = eachone['tel']    except:tel = Nonetry:typecode = eachone['typecode']    except:typecode = Nonetry:adcode = eachone['typecode']    except:adcode = Nonetry:distance = eachone['distance']     except:distance = Nonetry:pcode = eachone['pcode']        except:pcode = Nonetry:importance = eachone['importance']except:importance = Nonetry:biz_ext = eachone['biz_ext']except:biz_ext = Nonetry:recommend = eachone['recommend']    except:recommend = Nonetry:type = eachone['type']    except:type = Nonetry:photos = eachone['photos']   except:photos = Nonetry:discount_num = eachone['discount_num']       except:discount_num = Nonetry:gridcode = eachone['gridcode']         except:gridcode = Nonetry:typecode = eachone['typecode']         except:typecode = Nonetry:shopinfo = eachone['shopinfo']except:shopinfo = Nonetry:poiweight = eachone['poiweight']except:poiweight = Nonetry:citycode = eachone['citycode']    except:citycode = Nonetry:children = eachone['children']    except:children = Nonetry:alias = eachone['alias']    except:alias = Nonetry:tag = eachone['tag']   except:tag = Nonetry:event = eachone['event']   except:event = Nonetry:entr_location = eachone['entr_location']    except:entr_location = Nonetry:indoor_map = eachone['indoor_map']    except:indoor_map = Nonetry:email = eachone['email']    except:email = Nonetry:timestamp = eachone['timestamp']    except:timestamp = Nonetry:website = eachone['website']   except:website = Nonetry:pname = eachone['pname']    except:pname = Nonetry:biz_type = eachone['biz_type']    except:biz_type = Nonetry:cityname = eachone['cityname']    except:cityname = Nonetry:postcode = eachone['postcode']   except:postcode = Nonetry:match = eachone['match']    except:match = Nonetry:business_area = eachone['business_area']    except:business_area = Nonetry:indoor_data = eachone['indoor_data']  except:indoor_data = Nonetry:childtype = eachone['childtype']    except:childtype = Nonetry:exit_location = eachone['exit_location']    except:exit_location = Nonetry:location = eachone['location']    except:location = Nonetry:shopid = eachone['shopid']   except:shopid = Nonetry:navi_poiid = eachone['navi_poiid']    except:navi_poiid = Nonetry:groupbuy_num = eachone['groupbuy_num']    except:groupbuy_num = Nonedata={'distance':distance,'pcode':pcode,'importance':importance,'recommend':recommend,'type':type,'photos':photos,'discount_num':discount_num,'gridcode':gridcode,'typecode':typecode,'shopinfo':shopinfo,'poiweight':poiweight,'groupbuy_num':groupbuy_num,'navi_poiid':navi_poiid,'shopid':shopid,'location':location,'exit_location':exit_location,'childtype':childtype,'indoor_data':indoor_data,'business_area':business_area,'match':match,'postcode':postcode,'cityname':cityname,'biz_type':biz_type,'pname':pname,'biz_ext':biz_ext,'website':website,'timestamp':timestamp,'email':email,'indoor_map':indoor_map,'entr_location':entr_location,'event':event,'tag':tag,'alias':alias,'children':children,'citycode':citycode,}models.gaodeSpider(_id = id,parentId = parentId,location = location,name = name,address = address,tel = tel,typecode = typecode,adcode = adcode,data = data,count = count,).save()time.sleep(0.2)page += 1else:not_last_page = Falsereturn HttpResponse()