示例#1
0
def validate_db(yelp_object, haven_model=None):
    """takes the result of a yelp query by businesses id and compares it to the database entry. If any information
     on the local db is out of date, it is updated accordingly. Will also create new db if the haven_model is none"""
    print "yelp object in validate_db:", yelp_object
    print "haven_model in validate_db", haven_model
    new = False

    if haven_model is None:
        haven_model = Business()
        haven_model.yelp_id = yelp_object['id']
        new = True

    haven_model.name = yelp_object['name']

    if yelp_object['location'].get('address'):
        if len(yelp_object['location']['address']) > 1:
            haven_model.address_line_2 = yelp_object['location']['address'][1]

        haven_model.address_line_1 = yelp_object['location']['address'][0]

    # nothing in local db should not have a city and state code but if for some reason yelp wiped them, it prevents it
    # from being cleared, protecting db integrity
    if yelp_object['location'].get('city'):
        haven_model.city = yelp_object['location']['city']

    if yelp_object['location'].get('state_code'):
        haven_model.state = yelp_object['location']['state_code']

    if yelp_object['location'].get('postal_code'):
        haven_model.zipcode = yelp_object['location']['postal_code']

    if yelp_object.get('phone'):
        haven_model.phone = yelp_object['phone']

    if yelp_object['location'].get('coordinate'):
        haven_model.latitude = yelp_object['location']['coordinate']['latitude']
        haven_model.longitude = yelp_object['location']['coordinate']['longitude']
    try:
        if new:
            db.session.add(haven_model)
            print "successfully added"
        db.session.commit()
        print 'successfully committed'
        print "committed business:", haven_model

    except:
        print 'ut-oh'
示例#2
0
def _add_to_businesses(params):
    """adds dictionary of attributes to businesses db"""
    print params
    if not Business.query.filter_by(yelp_id=params['yelp_id']).first():
        business = Business()
        cat_list = []
        for key in params:
            # adds elements in category lists to category table if they don't already exist
            if key == "categories":
                for cat in params[key]:
                    cat_list.append(cat)
                    if not Category.query.filter_by(category_name=cat).first():
                        category = Category(category_name=cat)
                        db.session.add(category)
                # THROUGH LINE 40 REPLACED BY 30-34
                # for group in params[key]:
                #     print type(group)
                #     for subtype in group:
                #         print type(subtype)
                #         if not Category.query.filter_by(category_name=subtype).first():
                #             category = Category(category_name=subtype)
                #             db.session.add(category)
                #         cat_list.append(subtype)
                #         print cat_list
            elif key == "yelp_id":
                business.yelp_id = params[key]
            elif key == "name":
                business.name = params[key]
            elif key == "address_line_1":
                business.address_line_1 = params[key]
            elif key == "address_line_2":
                business.address_line_2 = params[key]
            elif key == "city":
                business.city = params[key]
            elif key == "state":
                business.state = params[key]
            elif key == "zipcode":
                business.zipcode = params[key]
            elif key == "phone":
                business.phone = params[key]
            elif key == "latitude":
                business.latitude = params[key]
            elif key == "longitude":
                business.longitude = params[key]
        try:
            db.session.add(business)
            db.session.commit()
        except:
            db.session.rollback()
            print business.name, "has insufficient information, skipping."
            return None
    # creates rows in reference table
        for cat in cat_list:
            # creates row in reference table
            business = Business.query.filter_by(yelp_id=params['yelp_id']).first()
            catbus = BusinessCategory()
            print business.business_id
            catbus.business_id = business.business_id
            cat_object = Category.query.filter_by(category_name=cat).first()
            print cat_object.category_name
            catbus.category_id = cat_object.category_id

            if not BusinessCategory.query.filter_by(business_id=catbus.business_id,
                                                     category_id=catbus.category_id).first():
                db.session.add(catbus)
        db.session.commit()

        print "added " + business.name + " to db"

    else:
        print "Already in Dictionary"
        return None
示例#3
0
def build_db(city, state):

    # categories = ['active', 'arts', 'auto', 'beautysvc', 'education', 'eventservices', 'financialservices', 'food',
    #               'health', 'homeservices', 'hotelstravel', 'localflavor', 'localservices', 'massmedia', 'nightlife',
    #               'pets', 'professional', 'publicservicesgovt', 'realestate', 'religiousorgs', 'restaurants',
    #               'shopping']
    city_state = city + ", " + state
    # for category in categories:

    # result_count = yelp_api.search_query(location=city_state, category_filter=category)['total']
    result_count = yelp_api.search_query(location=city_state)['total']

    offset = 0
    added = 0
    skipped = 0
    print result_count
    # max offset is 1000
    # try:
    while offset < result_count:
        while offset < 1000:
            # print category
            # results = yelp_api.search_query(location=city_state, category_filter=category, offset=offset)
            results = yelp_api.search_query(location=city_state, offset=offset)
            for result in results['businesses']:
                try:
                    business = Business()

                    # id
                    business.yelp_id = result['id']
                    # name
                    business.name = result['name']

                    # address lines 1 and 2
                    if result['location'].get('address'):
                        business.address_line_1 = result['location'][
                            'address'][0]
                        if len(result['location']['address']) > 1:
                            business.address_line_2 = result['location'][
                                'address'][1]

                    # city
                    business.city = result['location']['city']
                    # state code
                    business.state = result['location']['state_code']
                    # zip code
                    business.zipcode = result['location']['postal_code']

                    # phone
                    if result.get('phone'):
                        business.phone = result['phone']

                    # latitude and longitude
                    if result['location'].get('coordinate'):
                        business.latitude = result['location']['coordinate'][
                            'latitude']
                        business.longitude = result['location']['coordinate'][
                            'longitude']

                    # list of categories
                    if result.get('categories'):
                        cat_list = []
                        for group in result['categories']:
                            for subtype in group:
                                if not Category.query.filter_by(
                                        category_name=subtype).first():
                                    category = Category(category_name=subtype)
                                    db.session.add(category)

                                cat_list.append(subtype)

                    # if not Business.query.filter_by(yelp_id=business.yelp_id).first():
                    #     db.session.add(business)
                    db.session.add(business)
                    db.session.commit()
                    bus_id = business.business_id

                    for cat in cat_list:
                        # creates row in reference table
                        catbus = BusinessCategory()

                        catbus.business_id = bus_id

                        cat_object = Category.query.filter_by(
                            category_name=cat).first()
                        catbus.category_id = cat_object.category_id

                        db.session.add(catbus)
                    db.session.commit()
                    added += 1
                    print "added" + str(added)
                    print business.name
                except:
                    print "already added:" + business.name
                    print 'skipped' + str(skipped)
                    skipped += 1
                    print "added so far: " + str(added)

                db.session.commit()
                offset += 20
示例#4
0
def build_db(city, state):

    # categories = ['active', 'arts', 'auto', 'beautysvc', 'education', 'eventservices', 'financialservices', 'food',
    #               'health', 'homeservices', 'hotelstravel', 'localflavor', 'localservices', 'massmedia', 'nightlife',
    #               'pets', 'professional', 'publicservicesgovt', 'realestate', 'religiousorgs', 'restaurants',
    #               'shopping']
    city_state= city + ", " + state
    # for category in categories:

        # result_count = yelp_api.search_query(location=city_state, category_filter=category)['total']
    result_count = yelp_api.search_query(location=city_state)['total']

    offset = 0
    added = 0
    skipped = 0
    print result_count
    # max offset is 1000
    # try:
    while offset < result_count:
        while offset < 1000:
            # print category
            # results = yelp_api.search_query(location=city_state, category_filter=category, offset=offset)
            results = yelp_api.search_query(location=city_state, offset=offset)
            for result in results['businesses']:
                try:
                    business = Business()

                    # id
                    business.yelp_id = result['id']
                    # name
                    business.name= result['name']

                    # address lines 1 and 2
                    if result['location'].get('address'):
                        business.address_line_1 = result['location']['address'][0]
                        if len(result['location']['address']) > 1:
                            business.address_line_2 = result['location']['address'][1]

                    # city
                    business.city = result['location']['city']
                    # state code
                    business.state = result['location']['state_code']
                    # zip code
                    business.zipcode = result['location']['postal_code']

                    # phone
                    if result.get('phone'):
                        business.phone = result['phone']

                    # latitude and longitude
                    if result['location'].get('coordinate'):
                        business.latitude = result['location']['coordinate']['latitude']
                        business.longitude = result['location']['coordinate']['longitude']

                    # list of categories
                    if result.get('categories'):
                        cat_list = []
                        for group in result['categories']:
                            for subtype in group:
                                if not Category.query.filter_by(category_name=subtype).first():
                                    category = Category(category_name=subtype)
                                    db.session.add(category)

                                cat_list.append(subtype)

                    # if not Business.query.filter_by(yelp_id=business.yelp_id).first():
                    #     db.session.add(business)
                    db.session.add(business)
                    db.session.commit()
                    bus_id = business.business_id

                    for cat in cat_list:
                        # creates row in reference table
                        catbus = BusinessCategory()

                        catbus.business_id = bus_id

                        cat_object = Category.query.filter_by(category_name=cat).first()
                        catbus.category_id = cat_object.category_id

                        db.session.add(catbus)
                    db.session.commit()
                    added += 1
                    print "added" + str(added)
                    print business.name
                except:
                    print "already added:" + business.name
                    print 'skipped' + str(skipped)
                    skipped += 1
                    print "added so far: " + str(added)

                db.session.commit()
                offset += 20