def fetch_countries(data): url = data['url'] try: body, cookie = cm.get_data_cookie(url) except Exception: cm.dump('Error in fetching countries: %s' % url, log_name) return [] m = re.search(ur'name="form_build_id" value="(.+?)"', body) if m is None: cm.dump('Error in fetching countries: %s' % url, log_name) return [] data['form_build_id'] = m.group(1) if cookie is None: data['cookie'] = '' else: data['cookie'] = cookie start = body.find(ur'<select id="edit-countries"') if start == -1: cm.dump('Error in fetching countries: %s' % url, log_name) return [] body = cm.extract_closure(body[start:], ur'<select\b', ur'</select>')[0] results = [] for m in re.findall(ur'<option.+?value="([A-Z]{3})".*?>(.+?)</option>', body): d = data.copy() d['country_code'] = m[0] d['country'] = m[1].strip() print 'Country: %s, %s' % (d['country_code'], d['country']) results.append(d)
def fetch_countries(data): url = data["url"] try: body, data["cookie"] = cm.get_data_cookie(url) except Exception, e: cm.dump("Error in fetching AppKey: %s" % url, log_name) return ()
def fetch_countries(data): url = data['url'] try: body, data['cookie'] = cm.get_data_cookie(url) except Exception, e: cm.dump('Error in fetching countries: %s' % url, log_name) return ()
def fetch_store_details(data): url = data['url'] try: body, data['cookie'] = cm.get_data_cookie(url, cookie=data['cookie']) except Exception, e: cm.dump('Error in fetching stores: %s' % url, log_name) return ()
def fetch_states(data): url = data['url'] param = {'IsFooterForm': 'true', 'CurrentCountryID': data['country_code']} try: body, data['cookie'] = cm.get_data_cookie(url, param, cookie=data['cookie']) except Exception, e: cm.dump('Error in fetching states: %s, %s' % (url, param), log_name) return ()
def fetch_store_list(data): url = data['store_url'] param = {'CurrentCountryID': data['country_code'], 'CurrentCityID': (' %s' % data['city_code'])[-5:]} if data['state_code']: param['CurrentRegionID'] = data['state_code'] try: body, data['cookie'] = cm.get_data_cookie(url, param, cookie=data['cookie']) except Exception, e: cm.dump('Error in fetching store list: %s, %s' % (url, param), log_name) return ()
def fetch_cities(db, data, logger): url = data['url'] param = {'IsFooterForm': 'true', 'CurrentCountryID': data['country_code']} if data['state_code']: param['CurrentRegionID'] = data['state_code'] try: body, data['cookie'] = cm.get_data_cookie(url, param, cookie=data['cookie']) q = pq(body) except Exception, e: # cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return ()
def fetch_countries(db, data, logger): url = data['url'] try: body, data['cookie'] = cm.get_data_cookie(url) q = pq(body) except Exception as e: logger.error(unicode.format(u'Error in fetching countries: {0}', url)) return () results = [] for item in q('#bfselect-country option[value!=""]'): d = data.copy() d['country_code'] = item.attrib['value'] d['node_id'] = str(d['country_code']) temp = item.text.strip().upper() d['country'] = temp.decode('utf-8') if isinstance(temp, str) else temp results.append(d) return tuple(results)
def fetch_countries(data): url = data["url"] try: body, data["cookie"] = cm.get_data_cookie(url) except Exception, e: cm.dump("Error in fetching AppKey: %s" % url, log_name) return () app_key = data["cookie"]["AppKey"] url = data["data_url"] param = { "xml_request": '<request><appkey>%s</appkey><formdata id="getlist"><objectname>Account::Country</objectname><where></where></formdata></request>' % app_key } try: body, data["cookie"] = cm.get_data_cookie(url, param, cookie=data["cookie"]) except Exception, e: cm.dump("Error in fetching countries: %s, %s" % (url, param), log_name) return () tree = et.fromstring(body.encode("utf-8")) results = [] for ele in tree.iter("account_country"): d = data.copy() val = ele.getiterator("name")[0].text if not val: continue d["country_code"] = val.strip() results.append(d) return tuple(results)
url = data['url'] try: body, data['cookie'] = cm.get_data_cookie(url) except Exception, e: cm.dump('Error in fetching AppKey: %s' % url, log_name) return () app_key = data['cookie']['AppKey'] url = data['data_url'] param = { 'xml_request': '<request><appkey>%s</appkey><formdata id="getlist"><objectname>Account::Country</objectname><where></where></formdata></request>' % app_key } try: body, data['cookie'] = cm.get_data_cookie(url, param, cookie=data['cookie']) except Exception, e: cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name) return () tree = et.fromstring(body.encode('utf-8')) results = [] for ele in tree.iter('account_country'): d = data.copy() val = ele.getiterator('name')[0].text if not val: continue d['country_code'] = val.strip() results.append(d) return tuple(results)
def fetch_stores(data): url = data['url'] try: html, cookie_map = cm.get_data_cookie(url) except Exception: print 'Error occured in getting country list: %s' % url dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] print 'SLEEPING>>>>' time.sleep(5) m = re.search('http://www.ninewest.com/on/demandware.store/Sites-ninewest-Site/default/Stores-Find/C\d{10}', html) if m is None: return [] url = m.group(0) cookie_map_new = {} for key in cookie_map: if 'dwpersonalization_' in key or key == 'sr_token': continue cookie_map_new[key] = cookie_map[key] cookie_map_new['invited_visitor_22225'] = '1' cookie_map = cookie_map_new try: html = cm.post_data(url, {'dwfrm_storelocator_startaddress': 'kingman', 'dwfrm_storelocator_maxDistance': 30.00, 'dwfrm_storelocator_outlet': 'true', 'dwfrm_storelocator_retail': 'true', 'dwfrm_storelocator_optical': 'true', 'dwfrm_storelocator_eyewear': 'true', 'dwfrm_storelocator_apparel': 'true', 'dwfrm_storelocator_attire': 'true', 'dwfrm_storelocator_department': 'true', 'dwfrm_storelocator_IsMensFootwear': 'true', 'dwfrm_storelocator_IsRRR': 'true', 'dwfrm_storelocator_IsRRNY': 'true', 'dwfrm_storelocator_IsRRS': 'true', 'dwfrm_storelocator_wholesale': 'true', 'dwfrm_storelocator_bba': 'true', 'dwfrm_storelocator_ba': 'true', 'dwfrm_storelocator_search.x': 0, 'dwfrm_storelocator_search.y': 0, 'dwfrm_storelocator_countryCode': 'US', 'dwfrm_storelocator_postalCode': '67068', 'dwfrm_storelocator_distanceUnit': 'mi', 'dwfrm_storelocator_long': -98.117208, 'dwfrm_storelocator_lat': 37.647131,}, cookie=cookie_map) except Exception: print 'Error occured in getting country list: %s' % url dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] store_list = [] for m1 in re.finditer(ur'<div class="storeColumnOne">', html): sub, start, end = cm.extract_closure(html[m1.start():], ur'<div\b', ur'</div>') if end == 0: continue entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) m2 = re.search(ur'<div class="storename">([^<>]+)</div>', sub) if m2 is not None: entry[cm.name_e] = m2.group(1).strip() addr_list = [m2 for m2 in re.findall(ur'<div class="adddressline">([^<>]+)</div>', sub)] entry[cm.addr_e] = ', '.join(addr_list) m2 = re.search(ur'<div class="citystatezip">([^<>]+)</div>', sub) if m2 is not None: tmp = cm.reformat_addr(m2.group(1)) terms = re.split('[, ]+', tmp) if len(terms) < 3: entry[cm.addr_e] = tmp else: ret = gs.look_up(terms[0], 3) if ret is not None: entry[cm.city_e] = ret['name_e'] else: entry[cm.city_e] = terms[0].strip().upper() ret = gs.look_up(terms[1], 2) if ret is not None: entry[cm.province_e] = ret['name_e'] else: entry[cm.province_e] = terms[0].strip().upper() if re.match('\s*\d{5,}\s*', terms[2]) is not None: entry[cm.zip_code] = terms[2].strip() m2 = re.search(ur'<div class="storephone">([^<>]+)</div>', sub) if m2 is not None: entry[cm.tel] = m2.group(1) cm.update_entry(entry, {'country_e': 'UNITED STATES', 'continent_e': 'NORTH AMERICA'}) gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores')
def fetch_stores(data): url = data['url'] try: html, cookie_map = cm.get_data_cookie(url) except Exception: print 'Error occured in getting country list: %s' % url dump_data = { 'level': 1, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] print 'SLEEPING>>>>' time.sleep(5) m = re.search( 'http://www.ninewest.com/on/demandware.store/Sites-ninewest-Site/default/Stores-Find/C\d{10}', html) if m is None: return [] url = m.group(0) cookie_map_new = {} for key in cookie_map: if 'dwpersonalization_' in key or key == 'sr_token': continue cookie_map_new[key] = cookie_map[key] cookie_map_new['invited_visitor_22225'] = '1' cookie_map = cookie_map_new try: html = cm.post_data(url, { 'dwfrm_storelocator_startaddress': 'kingman', 'dwfrm_storelocator_maxDistance': 30.00, 'dwfrm_storelocator_outlet': 'true', 'dwfrm_storelocator_retail': 'true', 'dwfrm_storelocator_optical': 'true', 'dwfrm_storelocator_eyewear': 'true', 'dwfrm_storelocator_apparel': 'true', 'dwfrm_storelocator_attire': 'true', 'dwfrm_storelocator_department': 'true', 'dwfrm_storelocator_IsMensFootwear': 'true', 'dwfrm_storelocator_IsRRR': 'true', 'dwfrm_storelocator_IsRRNY': 'true', 'dwfrm_storelocator_IsRRS': 'true', 'dwfrm_storelocator_wholesale': 'true', 'dwfrm_storelocator_bba': 'true', 'dwfrm_storelocator_ba': 'true', 'dwfrm_storelocator_search.x': 0, 'dwfrm_storelocator_search.y': 0, 'dwfrm_storelocator_countryCode': 'US', 'dwfrm_storelocator_postalCode': '67068', 'dwfrm_storelocator_distanceUnit': 'mi', 'dwfrm_storelocator_long': -98.117208, 'dwfrm_storelocator_lat': 37.647131, }, cookie=cookie_map) except Exception: print 'Error occured in getting country list: %s' % url dump_data = { 'level': 1, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] store_list = [] for m1 in re.finditer(ur'<div class="storeColumnOne">', html): sub, start, end = cm.extract_closure(html[m1.start():], ur'<div\b', ur'</div>') if end == 0: continue entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) m2 = re.search(ur'<div class="storename">([^<>]+)</div>', sub) if m2 is not None: entry[cm.name_e] = m2.group(1).strip() addr_list = [ m2 for m2 in re.findall( ur'<div class="adddressline">([^<>]+)</div>', sub) ] entry[cm.addr_e] = ', '.join(addr_list) m2 = re.search(ur'<div class="citystatezip">([^<>]+)</div>', sub) if m2 is not None: tmp = cm.reformat_addr(m2.group(1)) terms = re.split('[, ]+', tmp) if len(terms) < 3: entry[cm.addr_e] = tmp else: ret = gs.look_up(terms[0], 3) if ret is not None: entry[cm.city_e] = ret['name_e'] else: entry[cm.city_e] = terms[0].strip().upper() ret = gs.look_up(terms[1], 2) if ret is not None: entry[cm.province_e] = ret['name_e'] else: entry[cm.province_e] = terms[0].strip().upper() if re.match('\s*\d{5,}\s*', terms[2]) is not None: entry[cm.zip_code] = terms[2].strip() m2 = re.search(ur'<div class="storephone">([^<>]+)</div>', sub) if m2 is not None: entry[cm.tel] = m2.group(1) cm.update_entry(entry, { 'country_e': 'UNITED STATES', 'continent_e': 'NORTH AMERICA' }) gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores')