Пример #1
0
def extract_by_phone(phone_list=None):
    print "====Begin extraction by phone."
    columns_list = mysqldao.column_names(db_name, tb_yelp_phone)
    biz_list = []
    if phone_list == None:
        phone_list = []
        temp_phone_list = mysqldao.select(db_name, tb_insp_norm, ['PHONE'])
        for p in temp_phone_list:
            phone_list.append(p[0])
    exist_phone_list = []
    exist_phone_tuple = mysqldao.select_unique_column(db_name, tb_log_phone,
                                                      'phone')
    count = 0
    for phone in phone_list:
        biz_data = {}
        biz_list = []
        phone = str(phone).replace(' ', '').replace('_', '')
        if len(phone) == 11:
            phone = phone[1:]
        if phone not in exist_phone_tuple:
            url_params = {"phone": phone, 'ywsid': 'bxtstnNlHgO8c6W4X2yuYA'}
            biz_data = yelp_api.request(API_HOST,
                                        '/phone_search',
                                        url_params=url_params)[BUSINESS]
            if len(biz_data) != 0:
                data_phone = dic_look_up(biz_data[0], 'phone')
                if data_phone == phone:
                    biz_list.append(biz_data[0])
            print "Phone:", phone, "count", len(biz_list), 'data'
            tran_list = json_transform_phone(biz_list, columns_list)
            load_data_db(db_name, tb_yelp_phone, tran_list)
            extracttime = str(datetime.datetime.now())[0:19]
            log_para_list = [{'extracttime': extracttime, 'phone': phone}]
            mysqldao.insert(db_name, tb_log_phone, log_para_list)
def extract_by_phone(phone_list=None):
    print "====Begin extraction by phone."
    columns_list = mysqldao.column_names(db_name, tb_yelp_phone)
    biz_list=[]
    if phone_list==None:
        phone_list=[]
        temp_phone_list=mysqldao.select(db_name, tb_insp_norm, ['PHONE'])
        for p in temp_phone_list:
            phone_list.append(p[0])
    exist_phone_list=[]
    exist_phone_tuple = mysqldao.select_unique_column(db_name,tb_log_phone,'phone')
    count = 0
    for phone in phone_list:
        biz_data={}
        biz_list=[]
        phone=str(phone).replace(' ','').replace('_','')
        if len(phone) ==11:
            phone = phone[1:]
        if phone not in exist_phone_tuple:
            url_params={
            "phone":phone,
            'ywsid':'bxtstnNlHgO8c6W4X2yuYA'
            }
            biz_data = yelp_api.request(API_HOST, '/phone_search', url_params=url_params)[BUSINESS]
            if len(biz_data) != 0:
                data_phone=dic_look_up(biz_data[0],'phone')
                if data_phone == phone:
                    biz_list.append(biz_data[0])
            print "Phone:",phone,"count",len(biz_list),'data'
            tran_list=json_transform_phone(biz_list,columns_list)
            load_data_db(db_name, tb_yelp_phone, tran_list)
            extracttime = str(datetime.datetime.now())[0:19]
            log_para_list = [{'extracttime':extracttime, 'phone':phone}]
            mysqldao.insert(db_name,tb_log_phone,log_para_list)
def extract_by_zipcode(url_params=url_params):
    print "====Begin extraction by zipcode."
    zip_tuple_all=mysqldao.select(db_name,tb_zipcode,['zipcode'])
    exist_zipcode_tuple = mysqldao.select_unique_column(db_name,tb_log_zipcode,'zipcode')
    for code in zip_tuple_all:
        zipcode = str(code[0])
        url_params_copy=url_params
        if zipcode not in exist_zipcode_tuple:
            url_params_copy['location']="New York, "+str(zipcode)+", NY"
            url_params_copy['zipcode']=str(zipcode)
            extract_full(url_params_copy)
        else:
            print zipcode, "existed."
Пример #4
0
def extract_by_zipcode(url_params=url_params):
    print "====Begin extraction by zipcode."
    zip_tuple_all = mysqldao.select(db_name, tb_zipcode, ['zipcode'])
    exist_zipcode_tuple = mysqldao.select_unique_column(
        db_name, tb_log_zipcode, 'zipcode')
    for code in zip_tuple_all:
        zipcode = str(code[0])
        url_params_copy = url_params
        if zipcode not in exist_zipcode_tuple:
            url_params_copy['location'] = "New York, " + str(zipcode) + ", NY"
            url_params_copy['zipcode'] = str(zipcode)
            extract_full(url_params_copy)
        else:
            print zipcode, "existed."
Пример #5
0
import mysqldao
import urllib
import time

address = """
REPLACE(
CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip),
'  ', ' ')
address"""

yelp_query = mysqldao.select('yelp', 'yelp_phone', ['phone', address, 'zip'], ['longitude', 'latitude'], [{'longitude': 0, 'latitude': 0}])
# address = urllib.quote(yelp_query[1][1])
print len(yelp_query)

import json
import requests

address = [ [query[0], query[1]] for query in yelp_query]
entries = []

del address[0]

little = []
# little.append(address[1])
# little.append(address[2])
# little.append(address[3])

import MySQLdb as mdb
import sys

con = mdb.connect(host = 'localhost', user = '******', passwd = '', charset='utf8');
Пример #6
0
import mysqldao
import urllib
import json
import requests

address = """
REPLACE(
CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip),
'  ', ' ')
address"""

yelp_query = mysqldao.select('dwdproject', 'yelp_phone', ['phone', address, 'zip'])
address = [ [query[0], query[1]] for query in yelp_query ]
urls = []
for adr in address:
    url = 'http://maps.googleapis.com/maps/api/geocode/json?address="%s"&sensor=true' % urllib.quote(adr[1])
    urls.append(url)


resp = requests.get(urls[1])
data = json.loads(resp.text)
geo = data["results"][0]["geometry"]["location"]

data_entry = {'longitude': geo["lng"], 'latitude': geo["lat"], 'phone': address[1][0]}

mysqldao.update('dwdproject', 'yelp_phone', ['longitude', 'latitude'], ['phone'], [data_entry])
print mysqldao.select('dwdproject', 'yelp_phone', ['*'], ['phone'],[{'phone': '7188924968' }])
Пример #7
0
import mysqldao
import urllib

address = """
REPLACE(
CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip),
'  ', ' ')
address"""

yelp_query = mysqldao.select('yelp', 'yelp_phone', ['phone', address, 'zip'])
# address = urllib.quote(yelp_query[1][1])
yelp_query

import json
import requests

address = [ [query[0], query[1]] for query in yelp_query ]
urls = []

for adr in address:
    url = 'http://maps.googleapis.com/maps/api/geocode/json?address="%s"&sensor=true' % urllib.quote(adr[1])
    urls.append(url)

    
resp = requests.get(urls[1])
data = json.loads(resp.text)
geo = data["results"][0]["geometry"]["location"]

data_entry = {'longitude': geo["lng"], 'latitude': geo["lat"], 'phone': address[1][0]}
data_entry
Пример #8
0
import mysqldao
import urllib
import json
import requests

address = """
REPLACE(
CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip),
'  ', ' ')
address"""

yelp_query = mysqldao.select('dwdproject', 'yelp_phone',
                             ['phone', address, 'zip'])
address = [[query[0], query[1]] for query in yelp_query]
urls = []
for adr in address:
    url = 'http://maps.googleapis.com/maps/api/geocode/json?address="%s"&sensor=true' % urllib.quote(
        adr[1])
    urls.append(url)

resp = requests.get(urls[1])
data = json.loads(resp.text)
geo = data["results"][0]["geometry"]["location"]

data_entry = {
    'longitude': geo["lng"],
    'latitude': geo["lat"],
    'phone': address[1][0]
}

mysqldao.update('dwdproject', 'yelp_phone', ['longitude', 'latitude'],
Пример #9
0
import mysqldao
import urllib

address = """
REPLACE(
CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip),
'  ', ' ')
address"""

yelp_query = mysqldao.select('yelp', 'yelp_phone', ['phone', address, 'zip'])
# address = urllib.quote(yelp_query[1][1])
yelp_query

import json
import requests

address = [[query[0], query[1]] for query in yelp_query]
urls = []

for adr in address:
    url = 'http://maps.googleapis.com/maps/api/geocode/json?address="%s"&sensor=true' % urllib.quote(
        adr[1])
    urls.append(url)

resp = requests.get(urls[1])
data = json.loads(resp.text)
geo = data["results"][0]["geometry"]["location"]

data_entry = {
    'longitude': geo["lng"],
    'latitude': geo["lat"],
import yelp_web
import yelp_etl
import mysqldao
import requests

BIZ_URL = "http://www.yelp.com/biz/"

keyword = 'price_range'
all_biz_id = mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,
                             ['id'])
list_biz_id = mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,
                              ['id'], [keyword], [{
                                  keyword: ''
                              }])

left_biz = len(list_biz_id)
total_biz = len(all_biz_id)
count = 0
for biz in list_biz_id:
    bizid = biz[0]
    url = BIZ_URL + bizid
    response = requests.get(url).text.encode('utf-8')
    linelist = str(response).split("\n")
    index = yelp_web.target_line_range(linelist, yelp_web.beginAttributeReg,
                                       yelp_web.endAttributeReg)
    attr_dict = yelp_web.attribute_match(linelist[index[0]:index[1]])
    attr_dict['id'] = bizid
    mysqldao.update(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,\
     yelp_web.list_attribute, ['id'], [attr_dict])
    count += 1
    print bizid, 'updated', "left ", left_biz - count
import yelp_web
import yelp_etl
import mysqldao
import requests

BIZ_URL="http://www.yelp.com/biz/"

keyword='price_range'
all_biz_id=mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant, ['id'])
list_biz_id=mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant, ['id'], [keyword], [{keyword:''}])

left_biz=len(list_biz_id)
total_biz=len(all_biz_id)
count = 0
for biz in list_biz_id:
	bizid=biz[0]
	url=BIZ_URL+bizid
	response=requests.get(url).text.encode('utf-8')
	linelist=str(response).split("\n")
	index=yelp_web.target_line_range(linelist, yelp_web.beginAttributeReg, yelp_web.endAttributeReg)
	attr_dict=yelp_web.attribute_match(linelist[index[0]:index[1]])
	attr_dict['id']=bizid
	mysqldao.update(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,\
		yelp_web.list_attribute, ['id'], [attr_dict])
	count+=1
	print bizid, 'updated', "left ", left_biz-count
Пример #12
0
import mysqldao
import urllib
import time

address = """
REPLACE(
CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip),
'  ', ' ')
address"""

yelp_query = mysqldao.select('yelp', 'yelp_phone', ['phone', address, 'zip'],
                             ['longitude', 'latitude'], [{
                                 'longitude': 0,
                                 'latitude': 0
                             }])
# address = urllib.quote(yelp_query[1][1])
print len(yelp_query)

import json
import requests

address = [[query[0], query[1]] for query in yelp_query]
entries = []

del address[0]

little = []
# little.append(address[1])
# little.append(address[2])
# little.append(address[3])