def parse_entities(text='', debug=False): """ entity level parsing :param text: :param debug: :return: """ from utils.baseutils import get_filepath from utils.db_utils import load_db, update_db db_dir = "/db" db_filename = "entity-cache.json" db_filepath = get_filepath(path.join(db_dir, db_filename)) db_keyword_pair = load_db(database_path=db_filepath, debug=debug) try: output = db_keyword_pair[text] if debug: print('local keyword pair found!') return output except KeyError: if debug: print( 'calling google translate to translate (will only happen once per word)' ) response = analyze_entities_api(text) print(response) raise response = translate_text_api(text=text, target=target, verbose=debug) output = response['translatedText'] db_keyword_pair[text] = output update_db(db_keyword_pair, database_path=db_filepath) return output
def translate_text(text='', target='ja', debug=False): from utils.baseutils import get_filepath from utils.db_utils import load_db, update_db db_dir = "/db" if target == 'ja': db_filename = "translation-to-ja-cache.json" elif target == 'en': db_filename = "translation-to-en-cache.json" else: raise SystemError( 'no translation cache defined. define one before proceeding.') db_filepath = get_filepath(path.join(db_dir, db_filename)) db_keyword_pair = load_db(database_path=db_filepath, debug=debug) try: output = db_keyword_pair[text] if debug: print('local keyword pair found!') return output except KeyError: if debug: print( 'calling google translate to translate (will only happen once per word)' ) response = translate_text_api(text=text, target=target, debug=debug) output = response['translatedText'] db_keyword_pair[text] = output update_db(db_keyword_pair, database_path=db_filepath) return output
def wrapper(*args, **kwargs): # before db_path = get_filepath(db_filepath) cache_db = load_db(database_path=db_path, debug=debug) try: cached_output = cache_db[cache_key] if debug: print('local keyword pair found!') return cached_output except: print('running function to cache: {}'.format(db_path)) # --------------------------------------------- output_to_cache = function_in(*args, **kwargs) # --------------------------------------------- # after cache_db[cache_key] = output_to_cache update_db(cache_db, database_path=db_filepath) return output_to_cache
import json, six, os import os.path as path # Imports the Google Cloud client library from google.cloud import translate, language from google.cloud.language import enums, types import googlemaps from utils.db_utils import load_db, update_db from utils.baseutils import get_filepath import os.path as path # analyzed entities cache try: db_dir = "db" sentiment_db_filename = "sentiment-cache.json" sentiment_db_filepath = get_filepath( path.join(db_dir, sentiment_db_filename)) except: db_dir = "aodh-backend/db" sentiment_db_filename = "sentiment-cache.json" sentiment_db_filepath = get_filepath( path.join(db_dir, sentiment_db_filename)) sentiment_db = load_db(database_path=sentiment_db_filepath, debug=False) from hidden.hidden import GoogleAPI API_KEY = GoogleAPI().api_key def translate_text_api(text='', target='ja', debug=False): """Translates text into the target language.
"content": [], "timestamp": "1999-01-01 00:00:00" }, "include_hashtags": { "content": [], "timestamp": "1999-01-01 00:00:00" } }, "hashtags": { "content": [], "timestamp": "1999-01-01 00:00:00" } } cache_db_filepath = get_filepath(path.join(db_dir, cache_filename)) make_db(db_dict_structure, cache_db_filepath) def get_tweet_content(tweet_id): if not isinstance(tweet_id, list): status = api.GetStatus(status_id=tweet_id).AsDict() tweet_content = status['text'] return tweet_content else: statuses = api.GetStatuses(status_ids=tweet_id) tweet_contents = [status.AsDict()['text'] for status in statuses] return tweet_contents def get_tweet_url(tweet_id):
from gensim.models import KeyedVectors from utils.baseutils import get_filepath import os def load_word2vec_model_read_only(model_filepath): wv_model = KeyedVectors.load(model_filepath) return wv_model try: read_only_model_path = 'aodh-backend/db/w2v/word2vec_readonly.model' read_only_model_path = os.path.join(os.getcwd(), read_only_model_path) model = load_word2vec_model_read_only(get_filepath(read_only_model_path)) except SystemError: read_only_model_path = '/db/w2v/word2vec_readonly.model' read_only_model_path = os.path.join(os.getcwd(), read_only_model_path) model = load_word2vec_model_read_only(get_filepath(read_only_model_path)) except: print('Windows system error') def similarity(word1=u'女', word2=u'バナナ', debug=False): try: results = model.similarity(word1, word2) except KeyError: results = 9999 if debug: print('(cosine) word similarity between {} and {}: '.format(word1, word2)) print(results) print('')
def load_db(database_path=db_file, debug=False): database_path = get_filepath(database_path) with open(database_path, encoding='utf-8') as json_db: return json.loads(json_db.read())