Exemplo n.º 1
0
def fetch_countries(data):
    url = data['url']
    try:
        body, cookie = cm.get_data_cookie(url)
    except Exception:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []

    m = re.search(ur'name="form_build_id" value="(.+?)"', body)
    if m is None:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []
    data['form_build_id'] = m.group(1)
    if cookie is None:
        data['cookie'] = ''
    else:
        data['cookie'] = cookie

    start = body.find(ur'<select id="edit-countries"')
    if start == -1:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []
    body = cm.extract_closure(body[start:], ur'<select\b', ur'</select>')[0]

    results = []
    for m in re.findall(ur'<option.+?value="([A-Z]{3})".*?>(.+?)</option>', body):
        d = data.copy()
        d['country_code'] = m[0]
        d['country'] = m[1].strip()
        print 'Country: %s, %s' % (d['country_code'], d['country'])
        results.append(d)
Exemplo n.º 2
0
def fetch_countries(data):
    url = data["url"]
    try:
        body, data["cookie"] = cm.get_data_cookie(url)
    except Exception, e:
        cm.dump("Error in fetching AppKey: %s" % url, log_name)
        return ()
Exemplo n.º 3
0
def fetch_countries(data):
    url = data['url']
    try:
        body, cookie = cm.get_data_cookie(url)
    except Exception:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []

    m = re.search(ur'name="form_build_id" value="(.+?)"', body)
    if m is None:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []
    data['form_build_id'] = m.group(1)
    if cookie is None:
        data['cookie'] = ''
    else:
        data['cookie'] = cookie

    start = body.find(ur'<select id="edit-countries"')
    if start == -1:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []
    body = cm.extract_closure(body[start:], ur'<select\b', ur'</select>')[0]

    results = []
    for m in re.findall(ur'<option.+?value="([A-Z]{3})".*?>(.+?)</option>',
                        body):
        d = data.copy()
        d['country_code'] = m[0]
        d['country'] = m[1].strip()
        print 'Country: %s, %s' % (d['country_code'], d['country'])
        results.append(d)
Exemplo n.º 4
0
def fetch_countries(data):
    url = data['url']
    try:
        body, data['cookie'] = cm.get_data_cookie(url)
    except Exception, e:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return ()
Exemplo n.º 5
0
def fetch_store_details(data):
    url = data['url']
    try:
        body, data['cookie'] = cm.get_data_cookie(url, cookie=data['cookie'])
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return ()
Exemplo n.º 6
0
def fetch_states(data):
    url = data['url']
    param = {'IsFooterForm': 'true', 'CurrentCountryID': data['country_code']}
    try:
        body, data['cookie'] = cm.get_data_cookie(url, param, cookie=data['cookie'])
    except Exception, e:
        cm.dump('Error in fetching states: %s, %s' % (url, param), log_name)
        return ()
Exemplo n.º 7
0
def fetch_store_list(data):
    url = data['store_url']
    param = {'CurrentCountryID': data['country_code'], 'CurrentCityID': ('    %s' % data['city_code'])[-5:]}
    if data['state_code']:
        param['CurrentRegionID'] = data['state_code']
    try:
        body, data['cookie'] = cm.get_data_cookie(url, param, cookie=data['cookie'])
    except Exception, e:
        cm.dump('Error in fetching store list: %s, %s' % (url, param), log_name)
        return ()
Exemplo n.º 8
0
def fetch_cities(db, data, logger):
    url = data['url']
    param = {'IsFooterForm': 'true', 'CurrentCountryID': data['country_code']}
    if data['state_code']:
        param['CurrentRegionID'] = data['state_code']
    try:
        body, data['cookie'] = cm.get_data_cookie(url, param, cookie=data['cookie'])
        q = pq(body)
    except Exception, e:
        # cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return ()
Exemplo n.º 9
0
def fetch_countries(db, data, logger):
    url = data['url']
    try:
        body, data['cookie'] = cm.get_data_cookie(url)
        q = pq(body)
    except Exception as e:
        logger.error(unicode.format(u'Error in fetching countries: {0}', url))
        return ()

    results = []
    for item in q('#bfselect-country option[value!=""]'):
        d = data.copy()
        d['country_code'] = item.attrib['value']
        d['node_id'] = str(d['country_code'])
        temp = item.text.strip().upper()
        d['country'] = temp.decode('utf-8') if isinstance(temp, str) else temp
        results.append(d)
    return tuple(results)
Exemplo n.º 10
0
def fetch_countries(data):
    url = data["url"]
    try:
        body, data["cookie"] = cm.get_data_cookie(url)
    except Exception, e:
        cm.dump("Error in fetching AppKey: %s" % url, log_name)
        return ()
    app_key = data["cookie"]["AppKey"]
    url = data["data_url"]
    param = {
        "xml_request": '<request><appkey>%s</appkey><formdata id="getlist"><objectname>Account::Country</objectname><where></where></formdata></request>'
        % app_key
    }
    try:
        body, data["cookie"] = cm.get_data_cookie(url, param, cookie=data["cookie"])
    except Exception, e:
        cm.dump("Error in fetching countries: %s, %s" % (url, param), log_name)
        return ()

    tree = et.fromstring(body.encode("utf-8"))
    results = []
    for ele in tree.iter("account_country"):
        d = data.copy()
        val = ele.getiterator("name")[0].text
        if not val:
            continue
        d["country_code"] = val.strip()
        results.append(d)
    return tuple(results)
Exemplo n.º 11
0
    url = data['url']
    try:
        body, data['cookie'] = cm.get_data_cookie(url)
    except Exception, e:
        cm.dump('Error in fetching AppKey: %s' % url, log_name)
        return ()
    app_key = data['cookie']['AppKey']
    url = data['data_url']
    param = {
        'xml_request':
        '<request><appkey>%s</appkey><formdata id="getlist"><objectname>Account::Country</objectname><where></where></formdata></request>'
        % app_key
    }
    try:
        body, data['cookie'] = cm.get_data_cookie(url,
                                                  param,
                                                  cookie=data['cookie'])
    except Exception, e:
        cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name)
        return ()

    tree = et.fromstring(body.encode('utf-8'))
    results = []
    for ele in tree.iter('account_country'):
        d = data.copy()
        val = ele.getiterator('name')[0].text
        if not val:
            continue
        d['country_code'] = val.strip()
        results.append(d)
    return tuple(results)
Exemplo n.º 12
0
def fetch_stores(data):
    url = data['url']
    try:
        html, cookie_map = cm.get_data_cookie(url)
    except Exception:
        print 'Error occured in getting country list: %s' % url
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    print 'SLEEPING>>>>'
    time.sleep(5)

    m = re.search('http://www.ninewest.com/on/demandware.store/Sites-ninewest-Site/default/Stores-Find/C\d{10}', html)
    if m is None:
        return []
    url = m.group(0)

    cookie_map_new = {}
    for key in cookie_map:
        if 'dwpersonalization_' in key or key == 'sr_token':
            continue
        cookie_map_new[key] = cookie_map[key]
    cookie_map_new['invited_visitor_22225'] = '1'
    cookie_map = cookie_map_new


    try:
        html = cm.post_data(url, {'dwfrm_storelocator_startaddress': 'kingman',
                                         'dwfrm_storelocator_maxDistance': 30.00,
                                         'dwfrm_storelocator_outlet': 'true',
                                         'dwfrm_storelocator_retail': 'true',
                                         'dwfrm_storelocator_optical': 'true',
                                         'dwfrm_storelocator_eyewear': 'true',
                                         'dwfrm_storelocator_apparel': 'true',
                                         'dwfrm_storelocator_attire': 'true',
                                         'dwfrm_storelocator_department': 'true',
                                         'dwfrm_storelocator_IsMensFootwear': 'true',
                                         'dwfrm_storelocator_IsRRR': 'true',
                                         'dwfrm_storelocator_IsRRNY': 'true',
                                         'dwfrm_storelocator_IsRRS': 'true',
                                         'dwfrm_storelocator_wholesale': 'true',
                                         'dwfrm_storelocator_bba': 'true',
                                         'dwfrm_storelocator_ba': 'true',
                                         'dwfrm_storelocator_search.x': 0,
                                         'dwfrm_storelocator_search.y': 0,
                                         'dwfrm_storelocator_countryCode': 'US',
                                         'dwfrm_storelocator_postalCode': '67068',
                                         'dwfrm_storelocator_distanceUnit': 'mi',
                                         'dwfrm_storelocator_long': -98.117208,
                                         'dwfrm_storelocator_lat': 37.647131,}, cookie=cookie_map)
    except Exception:
        print 'Error occured in getting country list: %s' % url
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    store_list = []
    for m1 in re.finditer(ur'<div class="storeColumnOne">', html):
        sub, start, end = cm.extract_closure(html[m1.start():], ur'<div\b', ur'</div>')
        if end == 0:
            continue

        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        m2 = re.search(ur'<div class="storename">([^<>]+)</div>', sub)
        if m2 is not None:
            entry[cm.name_e] = m2.group(1).strip()

        addr_list = [m2 for m2 in re.findall(ur'<div class="adddressline">([^<>]+)</div>', sub)]
        entry[cm.addr_e] = ', '.join(addr_list)

        m2 = re.search(ur'<div class="citystatezip">([^<>]+)</div>', sub)
        if m2 is not None:
            tmp = cm.reformat_addr(m2.group(1))
            terms = re.split('[, ]+', tmp)
            if len(terms) < 3:
                entry[cm.addr_e] = tmp
            else:
                ret = gs.look_up(terms[0], 3)
                if ret is not None:
                    entry[cm.city_e] = ret['name_e']
                else:
                    entry[cm.city_e] = terms[0].strip().upper()

                ret = gs.look_up(terms[1], 2)
                if ret is not None:
                    entry[cm.province_e] = ret['name_e']
                else:
                    entry[cm.province_e] = terms[0].strip().upper()

                if re.match('\s*\d{5,}\s*', terms[2]) is not None:
                    entry[cm.zip_code] = terms[2].strip()

        m2 = re.search(ur'<div class="storephone">([^<>]+)</div>', sub)
        if m2 is not None:
            entry[cm.tel] = m2.group(1)

        cm.update_entry(entry, {'country_e': 'UNITED STATES', 'continent_e': 'NORTH AMERICA'})
        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                          entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                          entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')
Exemplo n.º 13
0
def fetch_stores(data):
    url = data['url']
    try:
        html, cookie_map = cm.get_data_cookie(url)
    except Exception:
        print 'Error occured in getting country list: %s' % url
        dump_data = {
            'level': 1,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    print 'SLEEPING>>>>'
    time.sleep(5)

    m = re.search(
        'http://www.ninewest.com/on/demandware.store/Sites-ninewest-Site/default/Stores-Find/C\d{10}',
        html)
    if m is None:
        return []
    url = m.group(0)

    cookie_map_new = {}
    for key in cookie_map:
        if 'dwpersonalization_' in key or key == 'sr_token':
            continue
        cookie_map_new[key] = cookie_map[key]
    cookie_map_new['invited_visitor_22225'] = '1'
    cookie_map = cookie_map_new

    try:
        html = cm.post_data(url, {
            'dwfrm_storelocator_startaddress': 'kingman',
            'dwfrm_storelocator_maxDistance': 30.00,
            'dwfrm_storelocator_outlet': 'true',
            'dwfrm_storelocator_retail': 'true',
            'dwfrm_storelocator_optical': 'true',
            'dwfrm_storelocator_eyewear': 'true',
            'dwfrm_storelocator_apparel': 'true',
            'dwfrm_storelocator_attire': 'true',
            'dwfrm_storelocator_department': 'true',
            'dwfrm_storelocator_IsMensFootwear': 'true',
            'dwfrm_storelocator_IsRRR': 'true',
            'dwfrm_storelocator_IsRRNY': 'true',
            'dwfrm_storelocator_IsRRS': 'true',
            'dwfrm_storelocator_wholesale': 'true',
            'dwfrm_storelocator_bba': 'true',
            'dwfrm_storelocator_ba': 'true',
            'dwfrm_storelocator_search.x': 0,
            'dwfrm_storelocator_search.y': 0,
            'dwfrm_storelocator_countryCode': 'US',
            'dwfrm_storelocator_postalCode': '67068',
            'dwfrm_storelocator_distanceUnit': 'mi',
            'dwfrm_storelocator_long': -98.117208,
            'dwfrm_storelocator_lat': 37.647131,
        },
                            cookie=cookie_map)
    except Exception:
        print 'Error occured in getting country list: %s' % url
        dump_data = {
            'level': 1,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    store_list = []
    for m1 in re.finditer(ur'<div class="storeColumnOne">', html):
        sub, start, end = cm.extract_closure(html[m1.start():], ur'<div\b',
                                             ur'</div>')
        if end == 0:
            continue

        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        m2 = re.search(ur'<div class="storename">([^<>]+)</div>', sub)
        if m2 is not None:
            entry[cm.name_e] = m2.group(1).strip()

        addr_list = [
            m2 for m2 in re.findall(
                ur'<div class="adddressline">([^<>]+)</div>', sub)
        ]
        entry[cm.addr_e] = ', '.join(addr_list)

        m2 = re.search(ur'<div class="citystatezip">([^<>]+)</div>', sub)
        if m2 is not None:
            tmp = cm.reformat_addr(m2.group(1))
            terms = re.split('[, ]+', tmp)
            if len(terms) < 3:
                entry[cm.addr_e] = tmp
            else:
                ret = gs.look_up(terms[0], 3)
                if ret is not None:
                    entry[cm.city_e] = ret['name_e']
                else:
                    entry[cm.city_e] = terms[0].strip().upper()

                ret = gs.look_up(terms[1], 2)
                if ret is not None:
                    entry[cm.province_e] = ret['name_e']
                else:
                    entry[cm.province_e] = terms[0].strip().upper()

                if re.match('\s*\d{5,}\s*', terms[2]) is not None:
                    entry[cm.zip_code] = terms[2].strip()

        m2 = re.search(ur'<div class="storephone">([^<>]+)</div>', sub)
        if m2 is not None:
            entry[cm.tel] = m2.group(1)

        cm.update_entry(entry, {
            'country_e': 'UNITED STATES',
            'continent_e': 'NORTH AMERICA'
        })
        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')