def extract_from_address(xpath,spider,hxs,response,start=0,end=-1): original_source = None determined_source = None latlng = None address = '' _source = data_extractor.extractXpath(hxs,xpath,None,',') if end == -1: end = len(_source) original_source = _source[start:end] if original_source: latlng,determined_source = _retrieve_address_from_history(original_source) if not latlng: try: latlng,determined_source = try_geocode_address(original_source) except: ItemError_NoExcept('Unable to geocode address('+repr(original_source)+')', spider.name, response.url) else: address = original_source return (latlng,original_source,determined_source,address) if original_source and latlng and determined_source: geodata = { 'id': original_source, 'latlng':latlng, 'determined_source':determined_source } _add_address_to_history(geodata) address = original_source return (latlng,original_source,determined_source,address)
def extract_from_url(xpath,queryparam,spider,hxs,response): latlng = None address = '' url = data_extractor.extractXpath(hxs,xpath) if url: try: latlng = _geocode_url(url, queryparam) address = _reverse_geocode_address(latlng) except: pass else: url = 'Url xpath parse failed' return (latlng,url,url,address)