def get_cities(data): try: d = {"country": data["country_code"], "city": "", "service": -1} html = common.post_data(url, d) except Exception: print "Error occured in getting the list of countries: %s" % url dump_data = {"level": 1, "time": common.format_time(), "data": {"data": url}, "brand_id": brand_id} common.dump(dump_data) return [] start = html.find(u'<select id="city" name="city">') if start == -1: return [] end = html.find(u"</select>", start) html = html[start:end] city_list = [] for m in re.findall(ur'<option value="(.+?)">', html): if data["country_code"] == "GB" and "2 davies street" in m.lower(): continue elif data["country_code"] == "RO" and "13 september street" in m.lower(): continue elif "b1603daq" in m.lower(): continue else: city_list.append({"city_e": m, "country_e": data["country_e"], "country_code": data["country_code"]})
def fetch_stores(data): url = data['url'] try: body = cm.post_data(url) except Exception, e: cm.dump('Error in fetching stores: %s' % url, log_name) return []
def fetch_cities(data): """ 获得城市列表 :param data: """ url = data['post_url'] try: html = cm.post_data( url, { 'pid': data['country_id'], 'lang': 'en', 'action': 'popola_select_city' }) except Exception: print 'Error occured in getting city list: %s' % url dump_data = { 'level': 2, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] city_list = [] for m in re.findall(ur'<option value="(\d+)".*?>(.+?)</option>', html): if m[0] != '0': d = data.copy() d['city_id'] = string.atoi(m[0]) d['city_e'] = m[1].strip().upper() city_list.append(d)
def fetch_stores(data): url = data['url'] try: body = cm.post_data(url, {'rsp': 'json', 'country': data['country_code']}) except Exception: print 'Error occured: %s' % url dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] raw = json.loads(body) store_list = [] for s in raw['stores']: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = cm.html2plain(s['name']).strip() addr_list = [] for key in ['address1', 'address2']: if s[key].strip() != '': addr_list.append(cm.reformat_addr(s[key])) entry[cm.addr_e] = ' '.join(addr_list) # r=s['region'].strip().upper() # m = re.search(ur'\b([A-Z]{2})\b', r) # if data[cm.country_e]=='UNITED STATES' and m is not None: # # 美国 # ret = gs.look_up(m.group(1), 2) # if ret is not None: # r = ret['name_e'] # entry[cm.province_e] = r entry[cm.city_e] = cm.extract_city(s['city'])[0] entry[cm.zip_code] = s['zip'].strip() entry[cm.country_e] = data[cm.country_e] entry[cm.lat] = string.atof(s['lat']) entry[cm.lng] = string.atof(s['lng']) entry[cm.tel] = s['phone'].strip() entry[cm.fax] = s['fax'].strip() entry[cm.email] = s['emailaddress'].strip() entry[cm.url] = s['website'].strip() days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] opening = [] if 'openingHours' in s and s['openingHours'] is not None: for m in re.finditer(ur'i:(\d);s:\d+:\\?"([^\\"]+?)\\?"', s['openingHours']): opening.append('%s: %s' % (days[string.atoi(m.group(1))], m.group(2).strip())) entry[cm.hours] = ', '.join(opening) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores')
def fetch_cities(data): country_id = data['country_id'] try: html = cm.post_data(url, {'country_id': country_id}) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 2, 'time': cm.format_time(), 'data': data, 'brand_id': brand_id } cm.dump(dump_data) return [] start = html.find('<select name="city_id" id="city_id">') if start == -1: return [] start += len('<select name="city_id" id="city_id">') end = html.find('</select>', start) html = html[start:end] city_list = [] for m in re.findall(ur'<option\s.*?value="(\d+).*?">(.*?)</option>', html): entry = {'city': m[1].strip().upper(), 'city_id': string.atoi(m[0])} entry['country'] = data['country'] entry['country_id'] = data['country_id'] city_list.append(entry)
def get_cities(data): try: d = {'country': data['country_code'], 'city': '', 'service': -1} html = common.post_data(url, d) except Exception: print 'Error occured in getting the list of countries: %s' % url dump_data = {'level': 1, 'time': common.format_time(), 'data': {'data': url}, 'brand_id': brand_id} common.dump(dump_data) return [] start = html.find(u'<select id="city" name="city">') if start == -1: return [] end = html.find(u'</select>', start) html = html[start:end] city_list = [] for m in re.findall(ur'<option value="(.+?)">', html): if data['country_code'] == 'GB' and '2 davies street' in m.lower(): continue elif data['country_code'] == 'RO' and '13 september street' in m.lower(): continue elif 'b1603daq' in m.lower(): continue else: city_list.append({'city_e': m, 'country_e': data['country_e'], 'country_code': data['country_code']})
def fetch_cities(data): url = data['sel_url'] try: body = cm.post_data( url, { 'continent': data['continent'], 'country': data['country'], 'city': '', 'page': 0 }) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] raw = json.loads(body) city_list = [] for c in raw['city']: d = data.copy() d['city'] = c city_list.append(d) return city_list
def fetch_store_list(data): url = data['data_url'] param = { 'storelocator': 1, 'dofilter': 1, 'L': 0, 'map_sw': '-90.0%2C-180.0', 'map_ne': '90.0%2C180.0', 'slst': 'c', 'storetype': 1 } # storelocator=1&dofilter=1&L=0&map_sw=-90.0%2C-180.0&map_ne=90.0%2C180.0&slst=c&storetype=1 try: body = cm.post_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] results = [] for m in re.findall(ur'"id"\s*:\s*"([^"]+)"', body): tmp = m.split(',') for val in tmp: d = data.copy() d['store_id'] = string.atoi(val) results.append(d)
def fetch_store_list(data): url = data['url'] try: body = cm.post_data(url, { 'cCode': data['country_code'], 'city': data['city'], 'postsearch': 1 }) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] results = [] for m in re.finditer(ur'<td class\s*=\s*"ftd"', body): end = body.find('</tr>', m.start()) sub = body[m.start():end] m1 = re.search(ur'<td class="ltd"><a href="(.+?)">', sub) if m1 is None: print 'Cannot find details: %s / %s' % (data['country_code'], data['city']) else: d = data.copy() d['url'] = data['host'] + m1.group(1) results.append(d)
def get_countries(data): """ 返回国家列表 :rtype : [{'country_id':**, 'country':**}, ...] :param data: :return: """ url = data['url'] try: html = cm.post_data(url, {'country': -1, 'city': -1, 'recordit': -1}) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 1, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': brand_id } cm.dump(dump_data) return [] ret = [] for m in re.findall( ur'<li>\s*?<a href=.+?country-(\d+).+?">(.+?)<\\/a><\\/li>', html, re.S): country_id = string.atoi(m[0].strip()) country = m[1].replace(r'\r', '').replace(r'\n', '').strip().upper() ret.append({'country_id': country_id, 'country': country, 'url': url})
def get_store_list(data): """ 获得城市中的商店列表 :param data: :return: """ url = data['url'] try: html = cm.post_data(url, { 'country': data['country_id'], 'city': data['city_id'], 'recordid': -1 }) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 1, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': brand_id } cm.dump(dump_data) return [] pass ret = [] for m in re.findall(ur'<a href=.+?store-(\d+).+?">', html, re.S): store_id = string.atoi(m.strip()) entry = dict(data) entry['store_id'] = store_id ret.append(entry)
def fetch_cities(data, logger): """ 获得城市列表 :param data: """ url = data['post_url'] try: html = cm.post_data( url, { 'pid': data['country_id'], 'lang': 'en', 'action': 'popola_select_city' }) body = pq(html) except Exception: print 'Error occured in getting city list: %s' % url dump_data = { 'level': 2, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] city_list = [] for item in body('option[value!="0"]'): d = data.copy() d['city_id'] = int(item.attrib['value']) d['city_e'] = item.text.upper().strip() city_list.append(d) return city_list
def fetch_cities(data): url = data['url'] try: body = cm.post_data( url, { 'searchtype': 'normal', 'reiter_selected': 'reiter1', 'country_id': data['country_code'], 'city_id': 0 }) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] m = re.search(ur'<option value="0"[^>]*>city</option>', body) if m is None: return [] end = body.find(u'</select>', m.end()) city_list = [] for c in re.findall(ur'<option value="(.+?)"[^>]*>.+?</option>', body[m.end():end]): d = data.copy() d['city'] = c city_list.append(d)
def fetch_stores(data): param = {'action': 'getStoresFromAjax', 'country': data['country_code'], 'region': data['city'], 'collection': ''} url = data['url'] try: body = cm.post_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] store_list = [] for m1 in re.finditer(ur'<div class="shop-type-container">', body): sub = cm.extract_closure(body[m1.start():], ur'<div\b', ur'</div>')[0] store_class = '' m2 = re.search(ur'<div class="shop-type-title">(.+?)</div>', sub, re.S) if m2 is not None: store_class = cm.reformat_addr(m2.group(1)) for m2 in re.finditer(ur'<div class="shop"', sub): store_sub = cm.extract_closure(sub[m2.start():], ur'<div\b', ur'</div>')[0] entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.store_class] = store_class entry[cm.country_e] = data['country_code'] entry[cm.city_e] = cm.extract_city(data['city'])[0] m3 = re.search(ur'loadStore\((\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\)', store_sub) if m3 is not None: data['store_id'] = string.atoi(m3.group(1)) entry[cm.lat] = string.atof(m3.group(2)) entry[cm.lng] = string.atof(m3.group(3)) entry[cm.store_type] = ', '.join(get_detail(data)) m3 = re.search(ur'<div class="shop-name shop-details shop-main-name">([^<>]+)</div>', store_sub) if m3 is not None: entry[cm.name_e] = m3.group(1).strip() addr_list = [] m3 = re.search(ur'<div class="shop-street shop-details">([^<>]+)</div>', store_sub) if m3 is not None: addr_list.append(cm.reformat_addr(m3.group(1))) m3 = re.search(ur'<div class="shop-city shop-details">([^<>]+)</div>', store_sub) if m3 is not None: tmp = cm.reformat_addr(m3.group(1)) m3 = re.search(ur'(\d{4,})', tmp) if m3 is not None: entry[cm.zip_code] = m3.group(1).strip() addr_list.append(tmp) entry[cm.addr_e] = ', '.join(addr_list) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores_eu(data): url = data['data_url_eu'] param = {'query': data['city_code']} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return ()
def fetch_store_list(data): url = data['data_url'] param = {'zone': data['zone_id']} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching store list: %s, %s' % (url, param), log_name) return []
def fetch_cities(data): url = data['url'] param = {'ref': 'com', 'fld_Coun_Id': data['country_id'], 'search': data['search_type']} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return ()
def fetch_countries(data): url = data['data_url'] param = {'node_id': data['continent_id'], 'location_id': 0} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name) return ()
def fetch_stores(data): url = data['data_url'] param = {'countryId': data['country_id']} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return []
def fetch_states(data): url = data['data_url'] param = {'node_id': data['country_id'], 'location_id': 1} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching states: %s, %s' % (url, param), log_name) return ()
def fetch_stores(data): url = data['url'] param = {'searchinput': data['country']} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return []
def fetch_stores(data): url = data['data_url'] param = {'action': 'yoox_storelocator_change_city', 'city_id': data['city_id'], 'dataType': 'JSON'} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return []
def fetch_stores(db, data, logger): """ 获得商店信息 :param data: """ url = data['post_url'] try: html = cm.post_data(url, { 'pid': data['city_id'], 'lang': 'en', 'action': 'popola_box_DX' }) if html.strip() == u'': logger.error( unicode.format(u'Failed to fetch stores for city {0}', data['city_id'])) return [] body = pq(html) except Exception as e: print 'Error occured in getting city list: %s' % url dump_data = { 'level': 2, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] store_list = [] for item in (pq(temp) for temp in body('a[href]')): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.url] = item[0].attrib['href'] entry[cm.name_e] = item('h3.titleShop')[0].text.strip() # terms = cm.reformat_addr(item('div.txtBoxSingleStore p.lineHeight14')[0].text).split(',') terms = cm.reformat_addr( unicode(item('div.txtBoxSingleStore p.lineHeight14'))).split(',') tel = cm.extract_tel(terms[-1]) if tel != '': terms = terms[:-1] entry[cm.tel] = tel entry[cm.addr_e] = u', '.join([v.strip() for v in terms]) entry['country_e'] = data['country_e'] entry['city_e'] = data['city_e'] gs.field_sense(entry) logger.info( '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])) store_list.append(entry) cm.insert_record(db, entry, 'spider_stores.stores') return store_list
def fetch_stores(data): url = data['url'] try: body = cm.post_data(url, {}, {'Content-Type': 'application/json; charset=utf-8', 'Content-Length': 0, 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache'}) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, e), log_name) return []
def fetch_stores_beauty(data): url = data['lst_url'] param = {'chkCat[0]': 'FRG', 'chkCat[1]': 'MKP', 'chkCat[2]': 'PRE', 'chkCat[3]': 'EXC', 'div': 'fnb', 'lstCountry': data['country_code'], 'lstCity': data['city']} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching countries: %s' % url, log_name) return ()
def fetch_stores(data): """ 获得商店信息 :param data: :return: """ url = data["post_url"] try: js = json.loads( cm.post_data( url, {"country_id": data["country_id"], "retail_city": "", "retail_type": data["retail_type"]} ).decode("unicode_escape") ) except Exception: print "Error occured in getting country list: %s" % url dump_data = {"level": 1, "time": cm.format_time(), "data": {"url": url}, "brand_id": data["brand_id"]} cm.dump(dump_data) return [] # country_id=108&retail_city=&retail_type=retail # country_id=99&retail_city=&retail_type=service store_list = [] for s in js: entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"]) tmp = s["retail_name"].strip() if cm.is_chinese(tmp): entry[cm.name_c] = tmp else: entry[cm.name_e] = tmp entry[cm.addr_e] = s["retail_gmap"].strip() entry[cm.zip_code] = s["retail_zipcode"].strip() entry[cm.city_e] = s["retail_city"].strip().upper() if s["retail_email"] is not None: entry[cm.email] = s["retail_email"].strip() if s["retail_website"] is not None: entry[cm.url] = s["retail_website"].strip() if data["retail_type"] == "retail": entry[cm.store_class] = "Retail" else: entry[cm.store_class] = "Service Center" entry[cm.country_e] = s["country_name"].strip().upper() entry[cm.continent_e] = s["continent_name"].strip().upper() gs.field_sense(entry) print "(%s / %d) Found store: %s, %s (%s, %s)" % ( data["brandname_e"], data["brand_id"], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e], ) store_list.append(entry) db.insert_record(entry, "stores") return store_list
def fetch_stores(data): url = data['url'] lat, lng = data['city_lat'], data['city_lng'] param = {'json': 1, 'lat': lat, 'lng': lng, 'latLow': lat - 0.25, 'latHigh': lat + 0.25, 'lngLow': lng - 0.25, 'lngHigh': lng + 0.25, 'includeResellers': 'true'} try: body = cm.post_data('%s?%s' % (url, urllib.urlencode(param))) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return ()
def fetch_stores(data): """ 获得商店信息 :param data: """ url = data['post_url'] try: html = cm.post_data(url, { 'pid': data['city_id'], 'lang': 'en', 'action': 'popola_box_DX' }) except Exception: print 'Error occured in getting city list: %s' % url dump_data = { 'level': 2, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] store_list = [] for m in re.finditer(ur'<a href="(.+?)".*?>', html): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.url] = m.group(1) store_html, start, end = cm.extract_closure(html[m.start():], ur'<a href', ur'</a>') if end == 0: continue m1 = re.findall(ur'<h3 class="titleShop">(.+?)</h3>', store_html, re.S) if len(m1) > 0: entry[cm.name_e] = m1[0].strip() m1 = re.findall(ur'<p\b.*?>(.+?)(?:</p>|</div>)', store_html, re.S) if len(m1) > 0: terms = cm.reformat_addr(m1[0]).split(',') tel = cm.extract_tel(terms[-1]) if tel != '': terms = terms[:-1] entry[cm.tel] = tel entry[cm.addr_e] = ', '.join([v.strip() for v in terms]) entry['country_e'] = data['country_e'] entry['city_e'] = data['city_e'] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores')
def fetch_stores(data): url = data['store_url'] param = {'store_country': data['country_code'], 'store_city': data['city_code']} try: body = cm.post_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] store_list = [] for s in re.findall(ur'<marker\b([^<>]+)/\s*>', body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) m = re.search(ur'store_name="([^"]+)"', s) if m is not None: entry[cm.name_e] = cm.reformat_addr(m.group(1)) entry[cm.country_e] = data['country_code'] entry[cm.city_e] = data['city'] addr_list = [] for key in ['store_mall_name', 'store_address', 'store_zip_code']: m = re.search(ur'%s="([^"]+)"' % key, s) if m is not None: tmp = cm.reformat_addr(m.group(1)) if tmp != '': addr_list.append(tmp) entry[cm.addr_e] = ', '.join(addr_list) m = re.search(ur'store_zip_code="([^"]+)"', s) if m is not None: entry[cm.zip_code] = m.group(1).strip() m = re.search(ur'store_telephone="([^"]+)"', s) if m is not None: entry[cm.tel] = m.group(1).strip() m = re.search(ur'store_fax="([^"]+)"', s) if m is not None: entry[cm.fax] = m.group(1).strip() m = re.search(ur'store_email="([^"]+)"', s) if m is not None: entry[cm.email] = m.group(1).strip() m = re.search(ur'store_latitude="([^"]+)"', s) if m is not None: entry[cm.lat] = string.atof(m.group(1).strip()) m = re.search(ur'store_longitude="([^"]+)"', s) if m is not None: entry[cm.lng] = string.atof(m.group(1).strip()) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores(data): url = data['data_url'] state_id = data['state_id'] if state_id is None: state_id = 0 param = {'request': 'setStores', 'c': data['country_id'], 's': state_id, 'l': data['city_id'], 'ca': ''} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s' % url, log_name) return []
def fetch_cities(data): url = data['data_url'] state_id = data['state_id'] if state_id is None: param = {'request': 'countryChange', 'c': data['country_id']} else: param = {'request': 'stateChange', 'c': data['country_id'], 's': state_id} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return []
def fetch_cities(data): url = data['url'] param = { 'ref': 'com', 'fld_Coun_Id': data['country_id'], 'search': data['search_type'] } try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return ()
def fetch_stores(data): url = data['data_url'] param = {'tx_iostorefinder_pi1[__referrer][extensionName]': 'IoStorefinder', 'tx_iostorefinder_pi1[__referrer][controllerName]': 'Store', 'tx_iostorefinder_pi1[__referrer][actionName]': 'search', 'tx_iostorefinder_pi1[countryid]': data['country_code'], 'tx_iostorefinder_pi1[zip]': 'POSTCODE', 'tx_iostorefinder_pi1[city]': 'Town'} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return ()
def get_stores(data): # data[StoreLocator][pays]=BO url = data['url'] try: html = cm.post_data(url, {'data[StoreLocator][pays]': data['country_code'], 'data[StoreLocator][ville]': '', 'data[StoreLocator][etat]': 0}) except Exception, e: print 'Error occured: %s, %s' % (url, str(e)) dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id} cm.dump(dump_data) return []
def fetch_cities(data): url = data['data_url'] if data['no_region']: param = {'node_id': data['country_id'], 'location_id': 2, 'no_region': 1} else: param = {'node_id': data['state_id'], 'location_id': 2} try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return ()
def fetch_cities(data): url = data['data_url'] param = {'DestinationURL': 'Worldwide-retailers', '(country)': data['country_code']} try: body = cm.post_data(url, param) # m = re.search(ur'META HTTP-EQUIV="Location"\s+Content="([^"]+)"', body) # if not m: # raise IOError() # body = cm.get_data(m.group(1)) except Exception, e: cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return ()
def fetch_stores(data): url = data['url'] try: body = cm.post_data(url, {}, { 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': 0, 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache' }) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, e), log_name) return []
def get_detail(data): param = {'action': 'loadStoreFromAjax', 'id': data['store_id']} url = data['url'] try: body = cm.post_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return () m = re.search(ur'<div class="lines">(.+?)</div>', body, re.S) if m is None: return () return tuple(term.strip() for term in re.findall(ur'<li>(.+?)</li>', m.group(1), re.S))
def fetch_stores(data): url = data['post_shops'] param = {'city': data['city_e'], 'paulandjoe_women': 0, 'paulandjoe_man': 0, 'paulandjoe_sister': 0, 'paulandjoe_little': 0, 'paulandjoe_beauty': 0} try: html = cm.post_data(url, param) except Exception: print 'Error occured: %s' % url dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] store_list = [] try: for store in (pq(tmp) for tmp in pq(html)('ul')): try: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = cm.html2plain(store('li.first')[0].text).strip() entry[cm.country_e] = data[cm.country_e] entry[cm.city_e] = data[cm.city_e] addr_list = [] for term in (cm.reformat_addr(unicode(pq(tmp))) for tmp in store('li[class!="first"]')): if term != '': addr_list.append(term) tel = cm.extract_tel(addr_list[-1]) if tel != '': entry[cm.tel] = tel del addr_list[-1] entry[cm.addr_e] = ', '.join(addr_list) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s/%d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores') except (IndexError, TypeError) as e: cm.dump(u'Error in parsing %s, %s' % (url, param), log_name) print traceback.format_exc() continue except Exception, e: print traceback.format_exc()
def parse_store(data, body=None): if body is None: url = data['url'] try: body = cm.post_data(url) except Exception: cm.dump('Error in fetching stores: %s' % url, log_name) return [] start = body.find(ur'jQuery.extend(Drupal.settings,') latlng_map = {} if start != -1: for item in json.loads(cm.extract_closure(body[start:], ur'\{', ur'\}')[0])['getlocations']['key_1']['latlons']: latlng_map[cm.reformat_addr(item[3])] = {'lat': string.atof(item[0]), 'lng': string.atof(item[1])}
def fetch_stores(data): url = data['store_url'] param = {'univers[mode_femme]': 'pla_women', 'univers[mode_homme]': 'pla_dior_men', 'univers[baby_dior]': 'pla_baby_dior', 'univers[horlogerie]': 'pla_watches', 'univers[joaillerie]': 'pla_fine_jewelry', 'univers[dior_phone]': 'pla_dior_phone', 'continent': data['continent_id'], 'pays': data['country_id'], 'ville': data['city_id'], 'node_id': '581', 'search': 'SEARCH'} if not data['no_region']: param['region'] = data['state_id'] try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return ()
def fetch_stores(data): """ 获得商店信息 :param data: :return: """ url = data['post_url'] try: js = json.loads(cm.post_data(url, {'country_id': data['country_id'], 'retail_city': '', 'retail_type': data['retail_type']}).decode('unicode_escape')) except Exception: print 'Error occured in getting country list: %s' % url dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] # country_id=108&retail_city=&retail_type=retail # country_id=99&retail_city=&retail_type=service store_list = [] for s in js: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) tmp = s['retail_name'].strip() if cm.is_chinese(tmp): entry[cm.name_c] = tmp else: entry[cm.name_e] = tmp entry[cm.addr_e] = s['retail_gmap'].strip() entry[cm.zip_code] = s['retail_zipcode'].strip() entry[cm.city_e] = s['retail_city'].strip().upper() if s['retail_email'] is not None: entry[cm.email] = s['retail_email'].strip() if s['retail_website'] is not None: entry[cm.url] = s['retail_website'].strip() if data['retail_type'] == 'retail': entry[cm.store_class] = 'Retail' else: entry[cm.store_class] = 'Service Center' entry[cm.country_e] = s['country_name'].strip().upper() entry[cm.continent_e] = s['continent_name'].strip().upper() gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores') return store_list
def fetch_cities(data): param = {'action': 'getRegionsFromAjax', 'country': data['country_code']} url = data['url'] try: body = cm.post_data(url, param) except Exception: cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return [] results = [] for city in json.loads(body): d = data.copy() d['city'] = city results.append(d) return results
def fetch_cities(data): url = data['data_url'] param = {'country': data['country_code']} try: body = cm.post_data(url, param) except Exception: cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return [] results = [] for m in re.findall(ur'<option value="([^"]+)">([^<>]+)', body): d = data.copy() d['city'] = m[1].strip().upper() d['city_code'] = m[0] results.append(d)
def fetch_cities(data): url = data['data_url'] param = { 'DestinationURL': 'Worldwide-retailers', '(country)': data['country_code'] } try: body = cm.post_data(url, param) # m = re.search(ur'META HTTP-EQUIV="Location"\s+Content="([^"]+)"', body) # if not m: # raise IOError() # body = cm.get_data(m.group(1)) except Exception, e: cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return ()
def fetch_stores(data): url = data['home_url'] try: body = cm.post_data(url, {'lz_sf': data['province'], 'lz_sx': data['city']}) except Exception: cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']), 'samsonite_log.txt') dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] start = body.find(u'搜索结果') if start == -1: cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']), 'samsonite_log.txt') return [] body = body[start + 4:] store_list = [] for m in re.findall(ur'</script>\s*(\S+)\s*</span>', body, re.S): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = m.strip() entry[cm.addr_e] = m.strip() entry[cm.city_c] = data['city'] ret = gs.look_up(data['city'], 3) if ret is not None: entry[cm.city_e] = cm.extract_city(ret['name_e'])[0] if ret['province'] != '': entry[cm.province_e] = ret['province']['name_e'] entry[cm.province_c] = data['province'] ret = gs.look_up(data['province'], 2) if ret is not None: entry[cm.province_e] = ret['name_e'] entry[cm.country_e] = u'CHINA' gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), 'benetton_log.txt', False) db.insert_record(entry, 'stores') store_list.append(entry)
def get_store_details(data): url = data['url'] try: html = cm.post_data(url, {'country': data['country_id'], 'city': data['city_id'], 'recordid': data['store_id']}) except Exception: print 'Error occured: %s' % url dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id} cm.dump(dump_data) return [] entry = cm.init_store_entry(brand_id, brandname_e, brandname_c) info = json.loads(html)['elements'] addr = cm.reformat_addr(info['address'].replace('\\', '').replace('<p>', ',').replace('</p>', ',')) # 第一行为商店名称 terms = addr.split(',') if len(terms) > 0: entry[cm.name_e] = cm.reformat_addr(terms[0]) entry[cm.addr_e] = addr gmap_url = info['gmap'] m = re.findall(ur'(-?\d+\.\d+),(-?\d+\.\d+)', gmap_url) if len(m) > 0: cm.update_entry(entry, {cm.lat: string.atof(m[0][0]), cm.lng: string.atof(m[0][1])}) entry[cm.url] = info['shareurl'].replace('\\', '') entry[cm.hours] = info['openingtimes'] entry[cm.comments] = info['other'] # Geo country = data['country'] city = data['city'] cm.update_entry(entry, {cm.country_e: country, cm.city_e: city}) entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( brandname_e, brand_id, entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') return entry
def fetch_cities(data): url = data['data_url'] state_id = data['state_id'] if state_id is None: param = {'request': 'countryChange', 'c': data['country_id']} else: param = { 'request': 'stateChange', 'c': data['country_id'], 's': state_id } try: body = cm.post_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return []
def fetch_stores(data): """ 获得商店信息 :param data: """ url = data["post_url"] try: html = cm.post_data(url, {"pid": data["city_id"], "lang": "en", "action": "popola_box_DX"}) except Exception: print "Error occured in getting city list: %s" % url dump_data = {"level": 2, "time": cm.format_time(), "data": {"url": url}, "brand_id": data["brand_id"]} cm.dump(dump_data) return [] store_list = [] for m in re.finditer(ur'<a href="(.+?)".*?>', html): entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"]) entry[cm.url] = m.group(1) store_html, start, end = cm.extract_closure(html[m.start() :], ur"<a href", ur"</a>") if end == 0: continue m1 = re.findall(ur'<h3 class="titleShop">(.+?)</h3>', store_html, re.S) if len(m1) > 0: entry[cm.name_e] = m1[0].strip() m1 = re.findall(ur"<p\b.*?>(.+?)(?:</p>|</div>)", store_html, re.S) if len(m1) > 0: terms = cm.reformat_addr(m1[0]).split(",") tel = cm.extract_tel(terms[-1]) if tel != "": terms = terms[:-1] entry[cm.tel] = tel entry[cm.addr_e] = ", ".join([v.strip() for v in terms]) entry["country_e"] = data["country_e"] entry["city_e"] = data["city_e"] gs.field_sense(entry) print "(%s / %d) Found store: %s, %s (%s, %s)" % ( data["brandname_e"], data["brand_id"], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e], ) store_list.append(entry) db.insert_record(entry, "stores")