def obtain_past_articles(start, end):
    #method to obtain articles for a certain time frame and put them into Mongo
    global commodity_list
    global rest_caller
    global id_list
    rest_caller = RestCaller(mean_server_url)
    for commodities in commodity_list:
        raw_list = alchemy_news_crawler(commodities, start, end)
        converted_list = convert_to_Infusion_JSON_list(raw_list)

        #parses over all docs
        for item in converted_list:
            r = rest_caller.post(item)
            r_build = json.loads(r.text)
            id_list[r_build['_id']] = r_build['url']
def historical_data_process(start, end):
    """
    For reqeusting historical index data from yahoo server.
    store to mongo db.
    :param start: start date
    :param end: end date
    :return:
    """
    global indexes_list
    global rest_caller
    rest_caller = RestCaller(mean_server_url)
    for index in indexes_list:
        raw_quote_list = yql_query(index, start, end)
        converted_list = convert_to_Infusion_JSON_list(raw_quote_list)
        for item in converted_list:
            rest_caller.post(item)
def init():
    global rest_caller
    rest_caller = RestCaller(mean_server_url)
    r = rest_caller.get()
    text_data = json.loads(r.text)
    missing_content = dict()

    for data in text_data:
        if "Full text not available. Please use associated URL to view full text:" in data["content"]:
            missing_content[data["_id"]] = data["url"]
        else:
            continue

    if missing_content:
        alchemy_text_extraction(missing_content)
        print "\n\n"

    else:
        print "No text extractions needed!"
def daily_process():
    """
    For daily routinely chekcing the server for new data.
    Store it to mongo db
    :return:
    """
    global indexes_list
    global rest_caller
    rest_caller = RestCaller(mean_server_url)
    while True:
        try:
            for index in indexes_list:
                raw_quote_list = yql_query(index)
                converted_list = convert_to_Infusion_JSON_list(raw_quote_list)
                for item in converted_list:
                    rest_caller.post(item)
            print "Waiting for next checking period (1hour interval)..."
            time.sleep(3600)
        except Exception:
            print "Network is not stable... retry in next iteration"
            pass
def json_alchemy_entities(entity_unparsed):
    global default_date
    global entity_rest_caller
    entity_rest_caller = RestCaller(mean_server_entity_url)

    get_date = entity_unparsed['publicationDate']['date']
    if (get_date == ''):
        datestr_for_infusion = default_date
    else:
        timetext = time.strptime(entity_unparsed['publicationDate']['date'],"%Y%m%dT%H%M%S")
        datestr_for_infusion = time.strftime("%Y-%m-%d", timetext)

    entity_list = entity_unparsed['enrichedTitle']['entities']

    for entity in entity_list:
        data = {
            "entityDate":datestr_for_infusion,
            "text":entity['text'],
            "count":entity['count'],
            "sentiment":entity['sentiment']['score']
        }
        entity_rest_caller.post(data)