def __init__(self, api_key, api_server="http://languages.cortical.io/rest", language="fr_general"): '''Initialisation api_key : retina api key api_server : ex : "http://api.cortical.io/rest" or "http://languages.cortical.io/rest" or ... language : see self.fullClient.getRetinas() ''' self.fullClient = retinasdk.FullClient(api_key, apiServer=api_server, retinaName=language)
def sdm_sim(self, train_data, body_dict, threshold): ''' :param train_data : a list of training samples of type ['headline', 'bodyID', 'stance'] body_dict : a dictionary of values containing {bodyID:'bodyText'} threshold : used distinguish between similar and not similar ''' import retinasdk fullClient = retinasdk.FullClient( "e8bf8de0-fe52-11e6-b22d-93a4ae922ff1", apiServer="http://api.cortical.io/rest", retinaName="en_associative") bodyText_list = body_dict.values() bodyIds_index = dict( (k, index) for index, k in enumerate(body_dict.keys())) unrelated, related, y_true, y_pred = [], [], [], [] cnt1 = 0 cnt2 = 1 for headline, bodyID, stance in train_data: comp_with_stop_words = fullClient.compare( '[{"text": "' + headline + '"}, {"text": "' + bodyText_list[bodyIds_index[bodyID]] + '"}]') sim = comp_with_stop_words.cosineSimilarity # sim = comp_with_stop_words.jaccardDistance # comp_without_stop_words = fullClient.compare('[{"text": "'+' '.join(sent2stokens_wostop(headline))+'"}, {"text": "'+' '.join(sent2stokens_wostop(bodyText_list[bodyIds_index[bodyID]]))+'"}]') # sim = comp_without_stop_words.cosineSimilarity unrelated, related, y_true, y_pred = create_lists( sim, stance, threshold, [unrelated, related, y_true, y_pred]) # keep track of the processed examples if (cnt1 == 100): print(cnt2 * 100) cnt2 += 1 cnt1 = 0 cnt1 += 1 print_results([unrelated, related, y_true, y_pred], self.model_type)
def hammingCompare(outtweets, innerTwitter): client = retinasdk.FullClient(apiKey.retina_token, apiServer="http://api.cortical.io/rest", retinaName="en_associative") liteClient = retinasdk.LiteClient(apiKey.retina_token) res = [] for index, outtweet in enumerate(outtweets): result = {} # get simHash simhash_pair = getSimHash(outtweet[2], innerTwitter, client) if len(simhash_pair) > 1: diff_bits = simhash.num_differing_bits(simhash_pair['out_hash'], simhash_pair['in_hash']) hashes = [simhash_pair['out_hash'], simhash_pair['in_hash']] blocks = 4 # Number of blocks to use distance = 3 # Number of bits that may differ in matching pairs matches = simhash.find_all(hashes, blocks, distance) res.append([index, outtweet[2], matches]) return res
def __init__(self): self._sources = source_uri self._er = EventRegistry(apiKey=EVENT_REGISTRY_API_KEY) self._cortical_client = retinasdk.FullClient( CORTICAL_API_KEY, apiServer="http://api.cortical.io/rest", retinaName="en_associative") self._uid = str(uuid4()) self._clfPath = '../ml/models/glove100d.hdf5' self.model = load_model(self._clfPath) self.graph = tf.get_default_graph() self._preload_path = "../ml/data/data_dump_glove.data" self._dataset = pickle.load(open(self._preload_path, "rb")) self._train_data = self._dataset["X_train"] self._train_labels = self._dataset["Y_train"] self._test_data = self._dataset["X_test"] self._tokenizer = self.fit_tokenizer()
import json import retinasdk from apiStorage import apiKey # helper functions # reusable client for handling API calls sFunctionFullClient = retinasdk.FullClient( apiKey, apiServer="http://api.cortical.io/rest", retinaName="en_synonymous") aFunctionFullClient = retinasdk.FullClient( apiKey, apiServer="http://api.cortical.io/rest", retinaName="en_associative") FunctionLiteClient = retinasdk.LiteClient(apiKey) # input: category - a fingerprint of the category filter # term - the term you want to add to the category # output: the resulting fingerprint of assimilating given term def assimilateTermInCategory(category, term): orExpression = {"or": [{"positions": category}, {"term": term}]} return sFunctionFullClient.getFingerprintForExpression( json.dumps(orExpression)).positions #input: FP1 - fingerprint 1 to be merged with FP2 # FP2 - fingerprint 2
import retinasdk from misc import bcolors, testfiles import json import pandas as pd import numpy as np from sys import stdout liteClient = retinasdk.LiteClient("e29fcfe0") fullClient = retinasdk.FullClient("your_api_key", apiServer="http://api.cortical.io/rest", retinaName="en_associative") def compare_texts(texts1, texts2): print(bcolors.HEADER + "Compute similarity between sentences in dataframes:" + bcolors.ENDC) cosines = [] i = 0 l = len(texts1) for s1, s2 in zip(texts1, texts2): percent = i / l * 100 stdout.write("\r{0:.3f} %".format(percent)) stdout.flush() cosines.append( fullClient.compare(json.dumps([{ "text": s1 }, { "text": s2 }])).cosineSimilarity)
def init_connections(): ''' Function to get credentials and initiate all connections return (workspace_id, bot_id, conversation_client, slack_client) ''' global gv_nlu, gv_cortical_client, gv_bot_deafault_channel_name, gv_bot_deafault_channel_id, gv_objstore_conn, gv_ai # loading credentials from the file in case environmental variables are not set dotenv.load_dotenv(os.path.join(os.path.dirname(__file__), "config.env")) # Read credentials from env variable first and if not set read from config file # Watson conversation: "Conversation_KEEP", workspace - slackbotwatson conversation_username = os.environ.get( "CONVERSATION_USERNAME_1", os.getenv("CONVERSATION_USERNAME_F")) conversation_password = os.environ.get( "CONVERSATION_PASSWORD_1", os.getenv("CONVERSATION_PASSWORD_F")) workspace_id = os.environ.get("WORKSPACE_ID_1", os.getenv("WORKSPACE_ID_F")) # Slack: team - aesnewenergysolutions bot_id = os.environ.get("SLACK_BOT_USER_1", os.getenv("SLACK_BOT_USER_F")) bot_name = os.environ.get("SLACK_BOT_USER_NAME_1", os.getenv("SLACK_BOT_USER_NAME_F")) slack_bot_token = os.environ.get("SLACK_BOT_TOKEN_1", os.getenv("SLACK_BOT_TOKEN_F")) gv_bot_deafault_channel_name = os.environ.get( "SLACK_BOT_DEFAULT_CHANNEL_1", os.getenv("SLACK_BOT_DEFAULT_CHANNEL_F")) # API.AI apiai_token = os.environ.get("APIAI_CLIENT_ACCESS_TOKEN_2", os.getenv("APIAI_CLIENT_ACCESS_TOKEN_F")) # Natural Language Understanding - Natural Language Understanding-h3 nlu_username = os.environ.get("NLU_USERNAME_1", os.getenv("NLU_USERNAME_F")) nlu_password = os.environ.get("NLU_PASSWORD_1", os.getenv("NLU_PASSWORD_F")) # Bluemix Object Storage - “Object Storage-01” objstor_key = os.environ.get("OBJSTOR_KEY_1", os.getenv("OBJSTOR_KEY_F")) objstor_authurl = os.environ.get("OBJ_STOR_AUTHURL_1", os.getenv("OBJ_STOR_AUTHURL_F")) objstor_projectid = os.environ.get("OBJ_STOR_PROJECT_ID_1", os.getenv("OBJ_STOR_PROJECT_ID_F")) objstor_userid = os.environ.get("OBJ_STOR_USER_ID_1", os.getenv("OBJ_STOR_USER_ID_F")) objstor_region_name = os.environ.get("OBJ_STOR_REGION_NAME_1", os.getenv("OBJ_STOR_REGION_NAME_F")) # Cortical API Key cortical_key = os.environ.get("CORTICAL_KEY_1", os.getenv("CORTICAL_KEY_F")) if not all((conversation_username, conversation_password, workspace_id, bot_id, slack_bot_token, nlu_username, nlu_password, cortical_key, gv_bot_deafault_channel_name, apiai_token)): # If some of the service env vars are not set get them from VCAP vcap_env = None conversation_creds = None vcap_services = os.environ.get("VCAP_SERVICES") if vcap_services: vcap_env = json.loads(vcap_services) if vcap_env: conversation_creds = get_vcap_credentials(vcap_env, 'conversation') conversation_username = conversation_username or conversation_creds[ 'username'] conversation_password = conversation_password or conversation_creds[ 'password'] nlu_creds = get_vcap_credentials(vcap_env, 'natural-language-understanding') nlu_username = nlu_username or nlu_creds['username'] nlu_password = nlu_password or nlu_creds['password'] # bot_id = bot_id or conversation_creds['bot_id'] # bot_name = bot_name or conversation_creds['bot_name'] # slack_bot_token = slack_bot_token or conversation_creds['slack_bot_token'] # cortical_key = cortical_key or conversation_creds['cortical_key'] # gl_bot_deafault_channel_name = gv_bot_deafault_channel_name or conversation_creds['bot_deafault_channel'] # If we still don't have all the above plus a few, then no WOS. if not all( (conversation_username, conversation_password, workspace_id, bot_id, bot_name, slack_bot_token, nlu_username, nlu_password, cortical_key, gv_bot_deafault_channel_name, apiai_token)): print("Not all Environmental Variables are set") return None, None, None, None, None try: # Instantiate Cortical Client gv_cortical_client = retinasdk.FullClient( cortical_key, apiServer="http://api.cortical.io/rest", retinaName="en_associative") # Instantiate Watson Conversation client. conversation_client = ConversationV1(username=conversation_username, password=conversation_password, version='2016-09-20') gv_nlu = NaturalLanguageUnderstandingV1(username=nlu_username, password=nlu_password, version='2017-04-24') # instantiate Bluemix Object Storage gv_objstore_conn = swiftclient.Connection(key=objstor_key, authurl=objstor_authurl, auth_version='3', os_options={ "project_id": objstor_projectid, "user_id": objstor_userid, "region_name": objstor_region_name }) # Instantiate Slack chatbot. slack_client = SlackClient(slack_bot_token) # Instantiate AIP.AI conversation agent gv_ai = apiai.ApiAI(apiai_token) except: print("Connection to the Services could not be established !!!") return None, None, None, None, None # If BOT_ID wasn't set, we can get it using SlackClient and user ID. if not bot_id: api_call = slack_client.api_call("users.list") if api_call.get('ok'): # retrieve all users so we can find our bot users = api_call.get('members') for user in users: if bot_name in user and user.get( 'name') == conversation_username: bot_id = user.get('id') print("Found BOT_ID=" + bot_id) else: print("could not find user with the name " + conversation_username) else: print("could not find user because api_call did not return 'ok'") bot_id = None if not bot_id: print("Error: Missing BOT_ID or invalid SLACK_BOT_USER.") return None, None, None, None, None # get Channel ID for the default Channel vChannels = slack_client.api_call("channels.list", exclude_archived=1) for x in vChannels['channels']: if x["name"] == gv_bot_deafault_channel_name: gv_bot_deafault_channel_id = x["id"] # LOG.debug("Connection estabilished with both, Watson Bot and Slack !!!" ) return (workspace_id, bot_id, bot_name, conversation_client, slack_client)
xs, ys = np.array(xs), np.array(ys) x_train, x_test, y_train, y_test = train_test_split(xs, ys, test_size=0.33, random_state=42) print('... done;') print('training classifier...') clf = LinearSVC() clf.fit(x_train, y_train) predicted = clf.predict(x_test) print(metrics.classification_report(y_test, predicted)) print('done') with open(PICKLE_FILE_PATH, 'wb') as pickle_file: pickle.dump(clf, pickle_file) if __name__ == "__main__": if not os.path.exists(PICKLE_FILE_PATH): train() with open(PICKLE_FILE_PATH, 'rb') as pickle_file: clf = pickle.load(pickle_file) config = Config("cortical") fullClient = retinasdk.FullClient(config["api_key"]) test_txt = "Burning Down The House only sounds perfect when The Talking Heads perform it." positions = fullClient.getFingerprintForText(test_txt).positions test_sample = np.zeros(128 * 128, dtype=int) test_sample[positions] = 1 print(clf.predict_text([test_sample]))
import time import json import retinasdk import wikipedia import urllib.request fC = retinasdk.FullClient('37762630-a8ac-11e6-a057-97f4c970893c', apiServer="http://api.cortical.io/rest", retinaName="en_associative") start_time = time.time() # fC.compare(json.dumps([{"term": "apple"}, {"term": "oranges"}])) # fC.compare(json.dumps([{"term": "math"}, {"term": "calculus"}])) # print(fC.compare(json.dumps([{"term": "Donald Trump"}, {"term": "China"}]))) # print(fC.compareBulk(json.dumps([[{"term": "calculus"}, {"term": "math"}], # [{"term": "trigonometry"}, {"term": "math"}], [{"term": "Donald Trump"}, {"term": "math"}]]))) def getSimilarity(metric): return metric.weightedScoring def getFiveLinks(links): if len(links) < 5: return links return links[:5] def getComparison(src, dst): return [{"term": src}, {"term": dst}]
def setupCio(): """ Setup Cortical.io clients.""" apiKey = os.environ.get("CORTICAL_API_KEY") cioFullClient = retinasdk.FullClient(apiKey) cioLiteClient = retinasdk.LiteClient(apiKey) return cioFullClient, cioLiteClient
def get_keywords(text): fullClient = retinasdk.FullClient(config.cortical_api_key, apiServer="http://api.cortical.io/rest", retinaName="en_associative") keywords = fullClient.getKeywordsForText(text) return (keywords)
""" Transformer and tools to convert texts into their fingerprints. A fingerprint is a list of indexes in a 256*256 space """ from typing import Iterable, List, Optional, Any, Dict, cast import numpy import retinasdk from retinasdk.model.fingerprint import Fingerprint as CorticalFingerprint from scipy.sparse import lil_matrix, spmatrix from sklearn.base import TransformerMixin from common.config import get_config TFingerprint = List[int] _RETINA = retinasdk.FullClient(get_config().get("CORTICAL", "api_key")) def _to_fingerprint(fingerprint: CorticalFingerprint) -> TFingerprint: positions = fingerprint.positions assert isinstance(positions, list) assert all(isinstance(idx, int) for idx in positions) return cast(TFingerprint, positions) def get_fingerprints(texts: Iterable[str]) -> Iterable[TFingerprint]: """ Get fingerprints for a batch of texts. :param texts: text batch :return: list of fingerprints """
def __init__(self, apiKey, cacheDir, verbosity=0): self.cacheDir = cacheDir self.corticalClient = retinasdk.FullClient( apiKey, apiServer="http://api.cortical.io/rest", retinaName="en_synonymous")
import retinasdk, operator, json import logging, nltk, numpy as np from nltk.tokenize import RegexpTokenizer from threading import Thread np.seterr(divide='ignore', invalid='ignore') fullClient = retinasdk.FullClient("c3412e70-f345-11e5-8378-4dad29be0fab", apiServer="http://api.cortical.io/rest", retinaName="en_associative") stopwords = [ 'a', 'able', 'about', 'across', 'after', 'all', 'almost', 'also', 'am', 'among', 'an', 'and', 'any', 'are', 'as', 'at', 'be', 'because', 'been', 'but', 'by', 'can', 'cannot', 'could', 'dear', 'did', 'do', 'does', 'either', 'else', 'ever', 'every', 'for', 'from', 'get', 'got', 'had', 'has', 'have', 'he', 'her', 'hers', 'him', 'his', 'how', 'however', 'i', 'if', 'in', 'into', 'is', 'it', 'its', 'just', 'least', 'let', 'like', 'likely', 'may', 'me', 'might', 'most', 'must', 'my', 'neither', 'no', 'nor', 'not', 'of', 'off', 'often', 'on', 'only', 'or', 'other', 'our', 'own', 'rather', 'said', 'say', 'says', 'she', 'should', 'since', 'so', 'some', 'than', 'that', 'the', 'their', 'them', 'then', 'there', 'these', 'they', 'this', 'tis', 'to', 'too', 'twas', 'us', 'wants', 'was', 'we', 'were', 'what', 'when', 'where', 'which', 'while', 'who', 'whom', 'why', 'will', 'with', 'would', 'yet', 'you', 'your' ] class Question_Similarity: """ Class for finding a set of all similar Questions to a given Query Question. """