Exemplo n.º 1
0
 for url in shop_urls:
     url_open = "http://www.dianping.com" + url
     if db_shop.find_one({"mallid": 2001, "storeid": filter(str.isdigit, str(url))}):
         print "p{}-exist: {}".format(i, url_open)
         count_exi = count_exi + 1
         continue
     print "p{}: {}".format(i, url_open)
     html = get_html(url_open)
     shop_info = h2d_dianping(html=html, storeid=url).h2d()
     print shop_info
     if shop_info["addr"]:
         # 获取地理坐标 maximum retry = 3
         for i in range(0, 3):
             loc_coordinates = []
             try:
                 loc_coordinates = geocode(shop_info["addr"].encode("utf-8"))
             except Exception, e:
                 print e
             if loc_coordinates:
                 loc = []
                 loc.append(loc_coordinates[0][0]["lng"])
                 loc.append(loc_coordinates[0][0]["lat"])
                 shop_info["loc"] = {"type": "Point", "coordinates": loc}
                 print loc
                 break
             print "geocode retry: " + str(i)
     db_shop.replace_one({"mallid": shop_info["mallid"],
                                        "storeid": shop_info["storeid"]}, shop_info, upsert=True)
     count = count + 1
 if not page_site["next"]:
     break
Exemplo n.º 2
0
         "mallid": 2001,
         "storeid": filter(str.isdigit, str(url))
 }):
     print "p{}-exist: {}".format(i, url_open)
     count_exi = count_exi + 1
     continue
 print "p{}: {}".format(i, url_open)
 html = get_html(url_open)
 shop_info = h2d_dianping(html=html, storeid=url).h2d()
 print shop_info
 if shop_info["addr"]:
     # 获取地理坐标 maximum retry = 3
     for i in range(0, 3):
         loc_coordinates = []
         try:
             loc_coordinates = geocode(
                 shop_info["addr"].encode("utf-8"))
         except Exception, e:
             print e
         if loc_coordinates:
             loc = []
             loc.append(loc_coordinates[0][0]["lng"])
             loc.append(loc_coordinates[0][0]["lat"])
             shop_info["loc"] = {
                 "type": "Point",
                 "coordinates": loc
             }
             print loc
             break
         print "geocode retry: " + str(i)
 db_shop.replace_one(
     {
Exemplo n.º 3
0
db_shop.create_index([("loc", GEOSPHERE)], unique=True, background=True)

if __name__ == '__main__':
    all = db_shop.find()
    print "all:{}".format(all.count())
    count = 0
    for shop in all:
        print "======{}=======".format(count)
        print shop
        shop_id = shop["_id"]
        address = shop["addr"]
        for i in range(0, 3):
            loc_coordinates = []
            try:
                #time.sleep(5)
                loc_coordinates = geocode(address.encode("utf-8"))
            except Exception, e:
                print e
            if loc_coordinates:
                break
            print "retry: " + str(i)
        if loc_coordinates:
            loc = []
            loc.append(loc_coordinates[0][0]["lng"])
            loc.append(loc_coordinates[0][0]["lat"])
            db_shop.update_one(
                {"_id": shop_id},
                {"$set": {
                    "loc": {
                        "type": "Point",
                        "coordinates": loc