from geopy.geocoders import Nominatim #open street map -- free :) # import sqlalchemy from foodbornenyc.models.models import get_db_session from foodbornenyc.models.download_history import YelpDownloadHistory from foodbornenyc.models.businesses import Business, YelpCategory from foodbornenyc.models.businesses import businesses, categories, business_category_table from foodbornenyc.models.locations import Location, locations, location_id from foodbornenyc.models.documents import YelpReview, Document from foodbornenyc.models.documents import yelp_reviews, documents, document_associations from foodbornenyc.settings import yelp_download_config as config from foodbornenyc.db_settings import database_config as dbconfig from foodbornenyc.util.util import get_logger, xstr, xuni, sec_to_hms logger = get_logger(__name__, level='INFO') def download_url_to_file(url, data_dir, filename): """Download a url to local file. Write to file as stream. This keeps a low memory footprint Args: url (str): the url to download from data_dir (str): the local directory to write the file filename (str): the name of the file to write Returns: None
Simple Twitter Search based on keywords once every 5 seconds & save to database """ from time import time, sleep from twython import Twython from twython.exceptions import TwythonError from sqlalchemy.exc import OperationalError from foodbornenyc.models.documents import Tweet from foodbornenyc.db_settings import twitter_config from foodbornenyc.sources.twitter.util \ import tweet_to_Tweet, user_to_TwitterUser, place_to_Location,\ reset_location_cache, twitter, db from foodbornenyc.util.util import sec_to_hms, get_logger, xuni logger = get_logger(__name__, level="INFO") search_terms = [ '#foodpoisoning', '#stomachache', '"food poison"', '"food poisoning"', 'stomach', 'vomit', 'puke', 'diarrhea', '"the runs"' ] def search(keywords, since_id=None, count=100):
import datetime from time import time from sqlalchemy import func, select from sqlalchemy.exc import OperationalError from sklearn.externals import joblib from foodbornenyc.settings import yelp_classify_config as config from foodbornenyc.models.models import get_db_session from foodbornenyc.models.documents import YelpReview, documents #import foodbornenyc.models.businesses from foodbornenyc.util.util import get_logger, sec_to_hms logger = get_logger(__name__) class YelpClassify(object): """Method to classify yelp reviews already loaded into the database""" def __init__(self, sick_path=None): """ Initialize the method object and it's classifier Args: sick_path (str): if specified, load the classifier from this path else load from config Returns: None """ if sick_path: