Пример #1
0
def find_yelp(index, apart_result):
    address = apart_result.loc[index, 'Address']
    print(str(address))
    yelp_list = yelp.restaurant(str(address))
    df_yelp = pd.DataFrame(yelp_list, columns=['Name', 'Rating', 'URL'])
    df_yelp.sort_values(by='Rating', ascending=False)
    print(df_yelp)
Пример #2
0
def get_restuarants():
    res_dict = {}
    ret = pd.DataFrame()
    if(Budget < 20):
        url_no = 1
    elif(Budget < 60):
        url_no = 2
    elif(Budget < 120):
        url_no = 3
    else:
        url_no = 4
  
    res_dict = y.yelp_restuarants(ZipCode, url_no)
    if res_dict is None:
        print('Aw, Snap! Restaurants are not available.')
        return None
    elif len(res_dict) != 0:
        ret = pd.DataFrame(res_dict).transpose()
    else:
        pass
    
    return ret
Пример #3
0
	'''
    LE = LabelEncoder()
    food_inspections['ViolLevel'] = food_inspections['ViolLevel'].fillna(0)
    food_inspections['ViolLevel'] = LE.fit_transform(
        food_inspections['ViolLevel'])

    return food_inspections


'''''' '''''' '''''' '''''' '''''' ''''''
' O B T A I N   Y E L P   D A T A  '
'''''' '''''' '''''' '''''' '''''' ''''''

#First check if we can scrape data from Yelp!
try:
    results = Yelp.search('food', 'Boston MA')
except:
    print "ERROR: Could not extract data from Yelp. Looks like Yelp was unable to authenticate you. Do you have the necessary authentication tokens and keys? Check Yelp.py for more information."
    sys.exit()

#Load data
food_inspections = pd.read_csv('fixed_locations.csv',
                               sep=',',
                               low_memory=False)
food_inspections = food_inspections.iloc[::-1]

#Convert the set {NaN,*,**,***} in ViolLevel to the set {0,1,2,3}
food_inspections = fixViolLevel(food_inspections)

#Drop the unnecessary columns
columns = [
Пример #4
0
		for person in simplifiedTweets:
			places = self.yelp.searchStuff(random.choice(self.subarea), person[1])
			rng = random.randint(0, len(places) - 1)
			address = ' '.join(places[rng]['location'])
			name = places[rng]['name']
			googlelink = self.goo_shorten_url(address)
			replyTo = "@%s"% person[0]
			try:
				tweeter.updateMsg(replyTo + " #" + str(self.counter) + " " + name + " at " + googlelink)
				self.counter += 1
			except tweeter.error.TweepyError as err:
				print (err)

		threading.Timer(self.readtime, self.getTweets).start()

	def goo_shorten_url(self, address):
		post_url = 'https://www.googleapis.com/urlshortener/v1/url?key=AIzaSyAMjzI6DES-ntXZk-cC448DMDE3tnxaUiQ'
		longurl = 'http://www.google.com/maps/dir//' + address
		payload = {'longUrl' : longurl}
		headers = {'content-type' : 'application/json'}
		r = requests.post(post_url, data=json.dumps(payload), headers=headers)
		return r.json()['id']

	def run(self):
		"""Continuously post on twitter and wait for response"""
		self.makeTweets()
		self.getTweets()

goEatLA = GoEatLA(Yelp.YelpSearch(),45)
goEatLA.run()
Пример #5
0
def results(request):
    c={}

    # The first part of this function validates the data
    errors=[]
    try:
        c["current_distance"]=request.POST.get("distance")
        distance=float(c["current_distance"])
        units = request.POST.get("distance_type")
        if units == "miles":
            distance *= 1609.34
        assert distance>0
    except:
        errors.append("distance should be a numeric value greater than 0")
    try:
        c["current_loc"]=request.POST.get("location")
        loc=int(c["current_loc"])
    except:
        errors.append("location should be a zip code in Chicago")
    try:
        c["current_price_upper_limit"]=request.POST.get("price_upper_limit")
        current_price_upper_limit=round(float(c["current_price_upper_limit"]))
        assert current_price_upper_limit>=0
        try:
            c["current_price_lower_limit"]=request.POST.get("price_lower_limit")
            current_price_lower_limit=round(float(c["current_price_lower_limit"]))
            assert current_price_lower_limit<=current_price_upper_limit and current_price_lower_limit>=0
        except:
            errors.append("min price should be a numeric value greater than or equal to 0, and less than or equal to max price")
    except:
        errors.append("max price should be a numeric value greater than 0")
    c["current_date"]=request.POST.get("date")
    date='"'+c["current_date"].strip()+'"'
    search_date=re.search('"([\d]{4})-([\d]{2})-([\d]{2})"', date)
    if bool(search_date)!=True:
        errors.append("Please enter the date in YYYY-MM-DD format")
    else:
        year=search_date.group(1)
        if year not in ["2013", "2014", "2015", "2016"]:
            errors.append("no data available for year {}".format(year))
    weights=[]
    for j in range(1,16):
        try:
            weights.append(float(request.POST.get("pref_"+str(j))))
        except:
            errors.append("Survey question {} was not filled in".format(j))
    try:
        # Checks if the house types are valid and in the right order
        house_type1=request.POST.get("house_type1")
        house_type2=request.POST.get("house_type2")
        house_type3=request.POST.get("house_type3")
        assert house_type1 != None
        try:
            assert house_type1 != house_type2 and house_type1 != house_type3
        except:
            errors.append("House types repeat")
        if house_type2 == "" and house_type3 != "":
            errors.append("Invalid ordering")
        house_types = ["house_type"]
        house_types.append(house_type1)
        if house_type2 != "":
            house_types.append(house_type2)
            if house_type3 != "":
                house_types.append(house_type3)
    except:
        errors.append("Need first field for house type preference")

    listing_type=request.POST.get("listing_type")

    try:
        c["current_max_bathroom"]=request.POST.get("max_bathroom")
        current_max_bathroom=int(c["current_max_bathroom"])
        assert current_max_bathroom >= 0
        try:
            c["current_min_bathroom"] = request.POST.get("min_bathroom")
            current_min_bathroom=int(c["current_min_bathroom"])
            assert current_min_bathroom >= 0 and current_min_bathroom <= current_max_bathroom
        except: 
            errors.append("min bathroom should be an integer value greater than or equal to 0, and less than or equal to max bathroom")
    except:
        errors.append("max bathroom should be an integer value greater than or equal to 0")

    try:
        c["current_max_bedroom"] = request.POST.get("max_bedroom")
        current_max_bedroom=int(c["current_max_bedroom"])
        assert current_max_bedroom >= 0
        try:
            c["current_min_bedroom"] = request.POST.get("min_bedroom")
            current_min_bedroom = int(c["current_min_bedroom"])
            assert current_min_bedroom >= 0 and current_min_bedroom <= current_max_bedroom
        except: 
            errors.append("min bedroom should be an integer value greater than or equal to 0, and less than or equal to max bedroom")
    except:
        errors.append("max bedroom should be an integer value greater than or equal to 0")
    questions=[]
    count=1
    with open(current_path + "/search/templates/search/survey.txt") as f:
        for line in f:
            questions.append([line, count])
            count += 1
    c["survey"] = questions
    if len(errors) > 0:
        c["errors"] = errors
        return render(request, 'search/home.html', c)


    criteria_list =  [["price", current_price_lower_limit, current_price_upper_limit],
     ["bedroom", current_min_bedroom, current_max_bedroom],
                      ["bathroom", current_min_bathroom, current_max_bathroom], house_types]
    
    print("Querying zillow...")
    
    print("Here are the inputs:", loc, listing_type, criteria_list)
    house_list = ranking.get_house_list(loc, listing_type, criteria_list)
    print("Done. Found {} matching properties".format(len(house_list


        )))
    # Adds error message if the number of results is 0 or too large
    
    if len(house_list) > 200:
        errors.append("Too many results, please narrow down your search.")
        c["errors"]=errors
        return render(request, 'search/home.html', c)
    
    if len(house_list) ==0:
        errors.append("No results found.")
        c["errors"]=errors
        return render(request, 'search/home.html', c)



    zillow_pref, Yelp_pref, database_pref=weights[:4],weights[4:11], weights[11:]
    list_of_house_coords=[(j.lat,j.long) for j in house_list]
    scores=[]

    # Stores the scores gotten from yelp
    print("Requesting Yelp")
    Yelp_results=Yelp.get_yelp_scores(list_of_house_coords,distance,Yelp_pref)
    # Stores the results gotten from database
    database_results=sql_stuff.search(date, list_of_house_coords, distance, "search.db")
    # Extracts the scores from the results
    database_scores=[]
    for l in database_results:
        house_scores=[l[j][1] for j in DATABASE_CATEGORIES]
        database_scores.append(house_scores)
    


    #for when I test at csil
    #fake_yelp=[]
    #for k in range(len(house_list)):
    #   new_list=[]
    #   for j in range(7):
    #       new_list.append(0)
    #   fake_yelp.append(new_list)
    #for i in range(len(fake_yelp)):
    #   total_scores.append(fake_yelp[i]+database_scores[i])




    # Creates a list of lists with the yelp and city scores
    total_scores = []
    for i in range(len(Yelp_results)):
        total_scores.append(Yelp_results[i]+database_scores[i])
    # Passes the houses, user input, scores, and preferences into ranking to get the final scores 
    # Also sets the scores of each house object
    
    raw_scores_dict = ranking.get_final_scores(house_list, criteria_list, total_scores, zillow_pref, database_pref,Yelp_pref)
    scores_dict = {}
    top_ten_address = []
    for pref in raw_scores_dict:
        scores_list = []
        for tup in raw_scores_dict[pref]:
            if pref not in list(scores_dict.keys()):
                scores_dict[pref] = [tup[1]]
            else:
                scores_dict[pref].append(tup[1])
            if tup[2] not in top_ten_address:
                top_ten_address.append(tup[2])
    scores = {"zillow":{}, "yelp":{},"crime":{}}
    # Divides the scores in scores_dict into 3 categories: zillow, yelp, crime
    for key in scores_dict:
        for pref in PREF_OPTIONS_DICT:
            if key in PREF_OPTIONS_DICT[pref]:
                scores[pref][key] = (scores_dict[key])

    # If there are more than 10 results the graphs can get messy looking
    # We separate out the top 10 to use in graph generation
    if len(house_list) > 10:
        list_top_coords = []
        list_top_houses = []
        for address in top_ten_address:
            for house in house_list:
                if address == house.address:
                    list_top_coords.append((house.lat, house.long))
                    list_top_houses.append(house)
    
        top_Yelp_results=Yelp.get_yelp_scores(list_top_coords,distance,Yelp_pref)
    else:
        list_top_houses = house_list

    # Changes the data in scores into Address, value, value, value form to use in the bar chart
    # Also stores the variable names
    bar_data_dict = {"zillow":[[]], "yelp":[[]],"crime":[[]]}
    # For preference category
    for key in scores:
        variable_list = PREF_OPTIONS_DICT[key]
        # For each property in the top 10
        for i in range(len(top_ten_address)):
            value_list = []
            # Checks for the variables that the user cares about
            for variable in PREF_OPTIONS_DICT[key]:
                if variable in scores[key]:
                    # If the yelp scores were not generated before, generate them
                    if len(house_list) > 10 and key == "yelp":
                        value_list.append(math.ceil(top_Yelp_results[i][YELP_DICT[variable]]*100))
                    else:
                    # Add the score as 0 if the user does not care
                        value_list.append(scores[key][variable][i])
                else:
                    value_list.append(0)
            # Puts the data in desired form
            bar_data_dict[key][0].append([top_ten_address[i]] + value_list)
        # Adds the variable name
        bar_data_dict[key].append(variable_list)

    


    shutil.rmtree(HOUSE_PATH, ignore_errors=True)
    os.mkdir(HOUSE_PATH)
    index=0
    for i in house_list:
        os.mkdir(HOUSE_PATH+"/{}".format(i.house_id))
        for j in DATABASE_CATEGORIES:
            with open(HOUSE_PATH+"/{}/{}.csv".format(i.house_id, j), "w") as f:
                f.write("date,primary type,secondary type,latitude,longitude\n")
                for k in database_results[index][j][0]:
                    tuple_list=[str(l) for l in k]
                    row_string=",".join(tuple_list)
                    f.write(row_string+"\n")
        index+=1

    with open(HOUSE_PATH+"/attributes.csv", "w") as f:
        f.write("id,address,price,bedroom,bathroom,latitude,longitude,score,link\n")
        for j in house_list:
            address = j.address
            if "," in address:
                address = address.replace(',', '')

            row_string="{},{},{},{},{},{},{},{},{}".format(j.house_id,
             address, j.price, j.bedroom, j.bathroom, j.lat, j.long, j.score, j.link)
            f.write(row_string+"\n")



    # Gets the data from the csv files and puts it in the correct form
    bar_data = []
    all_crimes = {}
    for i in list_top_houses:
        for j in DATABASE_CATEGORIES:
            all_crimes[j]={}
            with open(HOUSE_PATH+"/{}/{}.csv".format(i.house_id,j), "r") as f:
                header=f.readline()
                reader=csv.reader(f)
                for row in reader:
                    date=row[0]
                    month_year=date[:7]
                    all_crimes[j][month_year]=all_crimes[j].get(month_year, 0)+1
            t_labels=list(all_crimes[j].keys())
            t_labels.sort()
            t=range(len(t_labels))
            s=[all_crimes[j][k] for k in t_labels]
            bar_data.append((i.address, j, sum(s)))
    bar_dict = {}
    for i in bar_data:
        if i[0] not in bar_dict:
            bar_dict[i[0]] = [i[2]]
        else:
            bar_dict[i[0]].append(i[2])
    bar_list = []
    for key in top_ten_address:
        bar_list.append([key]+ bar_dict[key])
    # Replaces the c dictionary with the house_list and fills it with the bar graph data
    c={'results': house_list}
    c['database_cat'] = DATABASE_CATEGORIES
    c['bar_data'] = bar_list
    c['zbar_data']=bar_data_dict["zillow"][0]
    c['ybar_data']=bar_data_dict["yelp"][0]
    c['cbar_data']=bar_data_dict["crime"][0]
    c['zbar_var']=bar_data_dict["zillow"][1]
    c['ybar_var']=bar_data_dict["yelp"][1]
    c['cbar_var']=bar_data_dict["crime"][1]
    c['current_distance'] = distance
    c["current_date"]=request.POST.get("date")
    
    return render(request, 'search/results.html', c)
Пример #6
0
def detailed_results(request):
    # The function for our detailed results page
    # Matches the house id with the selected house and adds the data to the dictionary
    # Creates and saves a graph
    c = {}
    house_id=request.POST.get("house_id")
    if not house_id:
        return render(request, 'search/error.html', c)
    with open(HOUSE_PATH+"/attributes.csv", "r") as f:
        header=f.readline()
        reader=csv.reader(f)
        for row in reader:
            if int(row[0])==int(house_id):
                c["current_lat"]=row[5]
                c["current_long"]=row[6]
                c["current_bedroom"]=row[3]
                c["current_bathroom"]=row[4]
                c["current_price"]=row[2]
                c["current_address"]=row[1]
                c["current_house_id"]=house_id
                c["current_link"] = row[8]
                break
    c['current_distance'] = request.POST.get('distance', 1200)
    data=[]
    all_crimes={}
    line_styles=[".r-", ".b-", ".g-", ".y-"]
    # The following code both saves a graph using matlibplot and stores chart data to use in charts
    graph_data_raw = []
    c['pie_data']=[]
    for j in DATABASE_CATEGORIES:
        all_crimes[j]={}
        with open(HOUSE_PATH+"/{}/{}.csv".format(house_id.strip(),j), "r") as f:
            header=f.readline()
            reader=csv.reader(f)
            for row in reader:
                date=row[0]
                month_year=date[:7]
                all_crimes[j][month_year]=all_crimes[j].get(month_year, 0)+1
        t_labels=list(all_crimes[j].keys())
        t_labels.sort()
        t=range(len(t_labels))
        s=[all_crimes[j][k] for k in t_labels]
        # Stores the pie data for crime
        c['pie_data'].append((j, sum(s)))
        if len(t)>15:
            step=(len(t)//15)+1
            for k in range(len(t_labels)):
                if k%step!=0:
                    t_labels[k]=""
        # Stores the graph data for crime
        graph_data_raw.append((j,t_labels,s))
        plt.xticks(t, t_labels, rotation=30)    
        plt.plot(t, s, line_styles.pop(), label =j)
    plt.xlabel("Date YYYY-MM")
    plt.ylabel("Number of crimes")
    plt.title("Crime within {}m of this property".format(c["current_distance"]))
    plt.legend()
    plt.grid(True)
    plt.savefig(HOUSE_PATH+"/{}/historical_crime.png".format(house_id.strip()))
    plt.clf()
    #c["crime_graph"]=HOUSE_PATH+"/{}/historical_crime.png".format(house_id.strip())

    # Changes graph_data_raw into correct format
    crime_list = []
    for date in graph_data_raw[0][1]:
        crime_list.append([date])
    for i in range(len(graph_data_raw)):
        crime_data = graph_data_raw[i][2]
        for j in range(len(crime_data)):
            crime_list[j].append(crime_data[j])
    c['graph_data'] = crime_list
    # Passes in the database categories for the pie chart
    c['database_cat'] = DATABASE_CATEGORIES
    # Gets the category selected, not selecting is the equivalent of no filter for category
    current_cat = request.POST.get('cat')
    if not current_cat:
        current_cat = ""
    # Passes the yelp categories to use in the form
    c['categories'] = ["restaurants", "active",
     "arts", "education", "health", "nightlife", "shopping"]
    distance = float(c["current_distance"])
    units = request.POST.get("distance_type")
    # Corrects distance for units and limit (Yelp limit is 40000)
    if units == "miles":
        distance *= 1609.34
    if distance > 40000:
        distance = 40000
    c["current_term"] =request.POST.get("term", "food")
    page = request.POST.get('page',1)
    c['current_page'] = page
    # Stores and passes in the Yelp results 
    c['results'], total = Yelp.yelp_search((c["current_lat"], c["current_long"]),
     distance, c['current_term'], category_filter = current_cat, offset = (int(page)-1)*20)
    # Calculates the number of pages the user can request
    c['pages'] = list(range(1,math.ceil(total/20)))
    
    return render(request, 'search/detailed_results.html', c)
Пример #7
0
"""
import os
import tensorflow as tf
import time
import Bi_LSTM
import Yelp

tf.set_random_seed(0)
## Setting GPU
os.environ['CUDA_VISIBLE_DEVICES'] = "3"
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
## file name
embedding_filename = "polyglot-en.pkl"  ## pretrained 64 dim word embedding
## Get data
yelp = Yelp.Yelp()
yelp.get_embedding(embedding_filename)
label, review = yelp.get_data("train")
train_X = yelp.make_corpus(review)
char_list = yelp.char_list(train_X)
yelp.get_OOV(char_list)

## Data -> Vector
train_X_ = yelp.Convert2Vec(train_X, "Context_Char")
train_Y_ = label

Batch_size = 32
Total_size = len(train_X)
Vector_size = 64
seq_length = [len(x) for x in train_X]
Maxseq_length = max(seq_length)