Exemplo n.º 1
0
def parse_entities(text='', debug=False):
    """
    entity level parsing
    :param text:
    :param debug:
    :return:
    """
    from utils.baseutils import get_filepath
    from utils.db_utils import load_db, update_db
    db_dir = "/db"
    db_filename = "entity-cache.json"

    db_filepath = get_filepath(path.join(db_dir, db_filename))

    db_keyword_pair = load_db(database_path=db_filepath, debug=debug)
    try:
        output = db_keyword_pair[text]
        if debug: print('local keyword pair found!')
        return output
    except KeyError:
        if debug:
            print(
                'calling google translate to translate (will only happen once per word)'
            )
        response = analyze_entities_api(text)
        print(response)
        raise
        response = translate_text_api(text=text, target=target, verbose=debug)
        output = response['translatedText']
        db_keyword_pair[text] = output
        update_db(db_keyword_pair, database_path=db_filepath)
        return output
Exemplo n.º 2
0
def translate_text(text='', target='ja', debug=False):
    from utils.baseutils import get_filepath
    from utils.db_utils import load_db, update_db
    db_dir = "/db"

    if target == 'ja':
        db_filename = "translation-to-ja-cache.json"
    elif target == 'en':
        db_filename = "translation-to-en-cache.json"
    else:
        raise SystemError(
            'no translation cache defined. define one before proceeding.')

    db_filepath = get_filepath(path.join(db_dir, db_filename))

    db_keyword_pair = load_db(database_path=db_filepath, debug=debug)
    try:
        output = db_keyword_pair[text]
        if debug: print('local keyword pair found!')
        return output
    except KeyError:
        if debug:
            print(
                'calling google translate to translate (will only happen once per word)'
            )
        response = translate_text_api(text=text, target=target, debug=debug)
        output = response['translatedText']
        db_keyword_pair[text] = output
        update_db(db_keyword_pair, database_path=db_filepath)
        return output
Exemplo n.º 3
0
        def wrapper(*args, **kwargs):
            # before
            db_path = get_filepath(db_filepath)

            cache_db = load_db(database_path=db_path, debug=debug)
            try:
                cached_output = cache_db[cache_key]
                if debug: print('local keyword pair found!')
                return cached_output
            except:
                print('running function to cache: {}'.format(db_path))
                # ---------------------------------------------
                output_to_cache = function_in(*args, **kwargs)
                # ---------------------------------------------
                # after
                cache_db[cache_key] = output_to_cache
                update_db(cache_db, database_path=db_filepath)
                return output_to_cache
Exemplo n.º 4
0
import json, six, os
import os.path as path
# Imports the Google Cloud client library
from google.cloud import translate, language
from google.cloud.language import enums, types
import googlemaps

from utils.db_utils import load_db, update_db
from utils.baseutils import get_filepath
import os.path as path

# analyzed entities cache
try:
    db_dir = "db"
    sentiment_db_filename = "sentiment-cache.json"
    sentiment_db_filepath = get_filepath(
        path.join(db_dir, sentiment_db_filename))
except:
    db_dir = "aodh-backend/db"
    sentiment_db_filename = "sentiment-cache.json"
    sentiment_db_filepath = get_filepath(
        path.join(db_dir, sentiment_db_filename))

sentiment_db = load_db(database_path=sentiment_db_filepath, debug=False)

from hidden.hidden import GoogleAPI
API_KEY = GoogleAPI().api_key


def translate_text_api(text='', target='ja', debug=False):
    """Translates text into the target language.
Exemplo n.º 5
0
            "content": [],
            "timestamp": "1999-01-01 00:00:00"
        },
        "include_hashtags": {
            "content": [],
            "timestamp": "1999-01-01 00:00:00"
        }
    },
    "hashtags": {
        "content": [],
        "timestamp": "1999-01-01 00:00:00"
    }
}


cache_db_filepath = get_filepath(path.join(db_dir, cache_filename))
make_db(db_dict_structure, cache_db_filepath)


def get_tweet_content(tweet_id):
    if not isinstance(tweet_id, list):
        status = api.GetStatus(status_id=tweet_id).AsDict()
        tweet_content = status['text']
        return tweet_content
    else:
        statuses = api.GetStatuses(status_ids=tweet_id)
        tweet_contents = [status.AsDict()['text'] for status in statuses]
        return tweet_contents


def get_tweet_url(tweet_id):
Exemplo n.º 6
0
from gensim.models import KeyedVectors
from utils.baseutils import get_filepath
import os


def load_word2vec_model_read_only(model_filepath):
    wv_model = KeyedVectors.load(model_filepath)
    return wv_model


try:
    read_only_model_path = 'aodh-backend/db/w2v/word2vec_readonly.model'
    read_only_model_path = os.path.join(os.getcwd(), read_only_model_path)
    model = load_word2vec_model_read_only(get_filepath(read_only_model_path))
except SystemError:
    read_only_model_path = '/db/w2v/word2vec_readonly.model'
    read_only_model_path = os.path.join(os.getcwd(), read_only_model_path)
    model = load_word2vec_model_read_only(get_filepath(read_only_model_path))
except:
    print('Windows system error')


def similarity(word1=u'女', word2=u'バナナ', debug=False):
    try:
        results = model.similarity(word1, word2)
    except KeyError:
        results = 9999
    if debug:
        print('(cosine) word similarity between {} and {}: '.format(word1, word2))
        print(results)
        print('')
Exemplo n.º 7
0
def load_db(database_path=db_file, debug=False):
    database_path = get_filepath(database_path)
    with open(database_path, encoding='utf-8') as json_db:
        return json.loads(json_db.read())