Пример #1
0
def main(start_date):
    """called by the script to do the processing"""

    # set the location of the settings file
    cap_dir = os.getenv("CAPSTONE_DIR")
    api_settings = os.path.join(cap_dir, 'settings', 'api_settings.json')
    project_settings = os.path.join(cap_dir, 'settings', 'project_settings.json')

    # set some settings
    database = load_setting(project_settings, 'db_name')     # name of mongodb database
    metadata_tab = load_setting(project_settings, "nyt_metadata_table")
    nyt_api_key = load_setting(api_settings, 'NYT_API_KEY')  # NYTimes API Key

    # open the connection to the database, get the table
    client = MongoClient()
    db = client[database]
    table = db[metadata_tab]

    # paginate through the search results
    # starting at the start date specified at runtime and a 2000 day initial window
    paginate_by_date(nyt_api_key, table, start_date=start_date, init_window=2000)

    # close the connection to the client
    client.close()
Пример #2
0
def main(fileout):

    # set the location of the settings file
    settings_file = os.path.join(os.getenv("CAPSTONE_DIR"), 'settings/project_settings.json')

    # set some settings
    database = load_setting(settings_file, 'db_name')  # name of mongodb database

    # open the connection to the database, get the table
    client = MongoClient()
    db = client[database]
    table = db['nyt-recipe-metadata']

    # get all results in the table
    results = table.find()
    with open(fileout, 'w') as f:
        for result in results:
            f.write(result['web_url'] + '\n')
Пример #3
0
def main(fileout):

    # set the location of the settings file
    settings_file = os.path.join(os.getenv("CAPSTONE_DIR"), 'settings/project_settings.json')

    # set some settings
    database = load_setting(settings_file, 'db_name')  # name of mongodb database

    # open the connection to the database, get the table
    client = MongoClient()
    db = client[database]
    table = db['nyt-recipe-metadata']

    # get all results in the table
    results = table.find()
    with open(fileout, 'w') as f:
        for result in results:
            f.write(result['web_url'] + '\n')


if __name__ == "__main__":

    cap_dir = os.getenv("CAPSTONE_DIR")
    if not os.path.isdir(os.path.join(cap_dir, 'data')):
        os.makedirs(os.path.join(cap_dir, 'data'))

    file_out = load_setting(os.path.join(cap_dir, 'settings', 'project_settings.json'), 'nyt_url_file')
    outfile = os.path.join(cap_dir, 'data', file_out)

    main(outfile)
    pickler(features, feature_pickle)

    # pickler(wc, bag_pickle)
    # pickler(vocab, vocab_pickle)
    # pickler(components, comp_pickle)


if __name__ == "__main__":

    # set the location of the settings file
    cap_dir = os.getenv("CAPSTONE_DIR")
    settings_file = os.path.join(cap_dir, 'settings', 'project_settings.json')

    # load in the tables to use
    database = load_setting(settings_file, 'db_name')  # name of mongodb database
    table = load_setting(settings_file, 'nyt_recipe_extracted')

    # connect to the client and database/collection
    client = MongoClient()
    db = client[database]
    tab = db[table]

    # name the pickles
    # bag_pickle = os.path.join(cap_dir, 'data/pickles/bag_of_ingredients.pkl')
    # vocab_pickle = os.path.join(cap_dir, 'data/pickles/vocabulary.pkl')
    # comp_pickle = os.path.join(cap_dir, 'data/pickles/recipe_components.pkl')
    feature_pickle = os.path.join(cap_dir, 'data/pickles/features.pkl')

    main(tab)
Пример #5
0
app = Flask(__name__)
app.debug = True

# set the location of the capstone directory
cap_dir = os.getenv("CAPSTONE_DIR", '/home/ubuntu/gschool-capstone')

# set the location of the settings file
settings_file = os.path.join(cap_dir, 'settings', 'project_settings.json')
api_file = os.path.join(cap_dir, 'settings', 'api_settings.json')

# set the pickle paths
pickle_path = os.path.join(cap_dir, 'data', 'pickles')

# load in some settings
database = load_setting(settings_file, item='db_name')

# get the google captcha api key
google_captcha_key = load_setting(api_file, item='GOOGLE_CAPTCHA')
google_site_key = load_setting(api_file, item="GOOGLE_SITEKEY")

with open(os.path.join(pickle_path, 'features.pkl'), 'rb') as f:
    features = pickle.load(f)

with open(os.path.join(pickle_path, 'topics.pkl'), 'rb') as f:
    topics = pickle.load(f)

client = MongoClient()
db = client[database]
tab = db['feedback']
Пример #6
0
            if verbose: print('already have recipe')

        # status message for output
        if verbose: print("Processed {} out of {} recipes\n".format(i+1, n_urls))

        # take a longer break every 100 recipes
        if i % 100 == 0:
            if verbose: print("Taking a break...\n")
            time.sleep(60)

if __name__ == "__main__":

    # set the location of the settings file
    cap_dir = os.getenv("CAPSTONE_DIR")
    settings_file = os.path.join(cap_dir, 'settings', 'project_settings.json')
    database = load_setting(settings_file, 'db_name')  # name of mongodb database
    recipe_tab = load_setting(settings_file, 'nyt_recipe_html')
    url_filename = load_setting(settings_file, 'nyt_url_file')

    # get a list of the urls to work on

    url_file = os.path.join(cap_dir, 'data', url_filename)
    with open(url_file, 'r') as f:
        urls = [line for line in f]

    # connect to the client and database/collection
    client = MongoClient()
    db = client[database]
    tab = db[recipe_tab]

    # get all the recipes from the url list
Пример #7
0
def main(i, o):

    to_parse = i.find()
    success = 0
    for ix, recipe in enumerate(to_parse):
        s = parse_and_insert(recipe, o)
        success += s

    print("FINISHED. Successfully parsed {} of out {} recipes".format(success, ix))


if __name__ == "__main__":

    # set the location of the settings file
    cap_dir = os.getenv("CAPSTONE_DIR")
    settings_file = os.path.join(cap_dir, "settings", "project_settings.json")

    database = load_setting(settings_file, "db_name")  # name of mongodb database
    in_table = load_setting(settings_file, "nyt_recipe_html")
    out_table = load_setting(settings_file, "nyt_recipe_extracted")

    # connect to the client and database/collection
    client = MongoClient()
    db = client[database]
    itab = db[in_table]
    otab = db[out_table]
    otab.remove({})

    main(itab, otab)