Пример #1
0
def load_data():
    air_hood = AirBnBNeighborhood(db_name='airbnb', coll_name='neighborhoods')
    hood_df = pd.DataFrame(list(air_hood.coll.find({})))

    air_listing = AirBnBNeighborhood(db_name='airbnb', coll_name='listings')
    listing_df = pd.DataFrame(list(air_listing.coll.find({})))
    listing_df = listing_df[listing_df['description_raw'].isnull() == False]

    merged_df = listing_df.merge(
        right=hood_df[['neighborhood', 'city', 'traits']],
        on='neighborhood',
        suffixes=('', '_copy'))
    return merged_df
def main():
    air_hood = AirBnBNeighborhood(db_name=DB, coll_name=COLL)

    df = pd.read_csv(NEIGHBORHOOD_FILEPATH)
    hood_list = df.to_dict('records')

    for hood in hood_list:
        air_hood.scrape_and_insert(neighborhood_id=hood['neighborhood_id'],
                                   neighborhood=hood['neighborhood'],
                                   neighborhood_url=hood['neighborhood_url'],
                                   city_id=hood['city_id'],
                                   city=hood['city'])

        # print "%s > %s" % (hood['city'], hood['neighborhood'])

        time.sleep(2.5)    # as to not get banned from AirBnB
def main():
    air_hood = AirBnBNeighborhood(db_name=DB_NAME, coll_name=COLL_NAME)
    hoods_dict = list(air_hood.coll.find({}, {'_id': 1}))

    for hood in hoods_dict:
        hood_id = hood['_id']
        air_hood.pull_from_db(neighborhood_id=hood_id)
        air_hood.extract_and_add_features()
        print "Extracting Features for %s" % hood_id