def get_all_business_types():
    businesses = loader.get_challengeset()
    idtoloc = loader.get_idtoloc()
    business_types_dict = loader.get_business_types()
    print "Done {} of {}".format(len(business_types_dict), len(businesses))
    for business in businesses:
        unique_id = business['unique_id']
        if unique_id not in business_types_dict.keys():
            print business['name']
            closest_place, best_sim = None, 0
            lat, lon = idtoloc[unique_id]
            for place in get_places(lat, lon):
                sim = cosine_sim(place['name'], business['name'])
                if sim > best_sim:
                    closest_place = place
                    best_sim = sim
            if closest_place:
                types = filter(lambda x: not x in ['point_of_interest', 'establishment', 'sublocality', 'route',
                                                   'real', 'political', 'of', 'or', 'local', 'locality', 'intersection',
                                                   '1'], closest_place['types'])
                types = " ".join(types).replace("_", " ")
            else:
                types = None
            print types
            business_types_dict[unique_id] = types
            loader.dump_business_dict(business_types_dict)
def clean_business_types():
    business_types_dict = loader.get_business_types()
    for k in business_types_dict.keys():
        if business_types_dict[k] is None:
            business_types_dict[k] = ''
        strs = business_types_dict[k].split(' ')
        s = []
        for st in strs:
            if st not in [
                    None, 'agency', 'place', 'food', 'point_of_interest',
                    'establishment', 'sublocality', 'route', 'real',
                    'political', 'of', 'or', 'local', 'locality',
                    'intersection', '1'
            ]:
                s += [st]
        if len(s) == 0:
            business_types_dict[k] = ''
        else:
            business_types_dict[k] = ' '.join(s)
    loader.dump_business_dict(business_types_dict)
Пример #3
0
    print "Score: ", total
    print "Total: ", max_potential


def getPredictionScoreOfTrainingSet():
    """
    Returns the score of the classifications on the validation set.
    """
    hand_classified_set = loader.get_hand_classifiedset()
    algo_classified_set = loader.get_algo_classifiedset()
    total = 0
    scores = []
    for uid, actual in hand_classified_set.iteritems():
        guess = algo_classified_set[uid]
        scores.append(util.score_prediction(guess, actual))
    total = sum(scores)
    max_potential = len(hand_classified_set.keys()) * 6
    return total / float(max_potential)


def writeClassification(business_uid, naics_code):
    loader.write_row_algo_classified_set(business_uid, naics_code)


if __name__ == "__main__":
    naics_items = loader.get_naics_data_for_level(6)
    business_types = loader.get_business_types()
    naics_dict = loader.get_naics_dict()
    featurizer = None
    manager.run()
Пример #4
0
    return render_template('classifypage.html', business=business, naics_dict=naics_dict, agent=agent)


@app.route('/c/<agent>/<test>/<business_uid>/<naics_code>', methods=['POST'])
def classifyBusiness(agent, test, business_uid, naics_code):
    if request.method == 'POST':
        loader.write_row_classified_set(business_uid, naics_code)
        if test != 'test':
            loader.write_row_hand_classified_set(business_uid, naics_code)
        return redirect('/classifier/' + agent)
    else:
        return abort(405)  # 405 Method Not Allowed


@app.route('/database')
@app.route('/database/<int:page>', methods=['GET'])
def databaseView(page=1):
    businesses = dbh.getBusinessPage(page)
    return render_template('database.html', businesses=businesses, hand_classified_set=hand_classified_set,
                           algo_classified_set=algo_classified_set, naics_dict=naics_dict)


if __name__ == "__main__":
    naics_items = loader.get_naics_data_for_level(6)
    hand_classified_set = loader.get_hand_classifiedset()
    algo_classified_set = loader.get_algo_classifiedset()
    unclassified_business_ids = loader.get_unclassified_business_ids()
    business_types = loader.get_business_types()
    naics_dict = loader.get_naics_dict()
    app.run(debug=True)
Пример #5
0
 def __init__(self):
     self.google_types = loader.get_business_types()
     self.model = loader.get_word2vecmodel()
     print "Loaded Models"
Пример #6
0
 def __init__(self):
     self.google_types = loader.get_business_types()
     self.model = loader.get_word2vecmodel()
     print "Loaded Models"