Пример #1
0
    def __init__(self,
                 api_key,
                 api_server="http://languages.cortical.io/rest",
                 language="fr_general"):
        '''Initialisation
			api_key		:	retina api key
			api_server	:	ex : "http://api.cortical.io/rest" or "http://languages.cortical.io/rest" or ...
			language	:	see self.fullClient.getRetinas()
		'''
        self.fullClient = retinasdk.FullClient(api_key,
                                               apiServer=api_server,
                                               retinaName=language)
Пример #2
0
    def sdm_sim(self, train_data, body_dict, threshold):
        '''
        :param 
        train_data : a list of training samples of type ['headline', 'bodyID', 'stance']
        body_dict : a dictionary of values containing {bodyID:'bodyText'}
        threshold : used distinguish between similar and not similar
        '''
        import retinasdk
        fullClient = retinasdk.FullClient(
            "e8bf8de0-fe52-11e6-b22d-93a4ae922ff1",
            apiServer="http://api.cortical.io/rest",
            retinaName="en_associative")

        bodyText_list = body_dict.values()
        bodyIds_index = dict(
            (k, index) for index, k in enumerate(body_dict.keys()))

        unrelated, related, y_true, y_pred = [], [], [], []
        cnt1 = 0
        cnt2 = 1
        for headline, bodyID, stance in train_data:

            comp_with_stop_words = fullClient.compare(
                '[{"text": "' + headline + '"}, {"text": "' +
                bodyText_list[bodyIds_index[bodyID]] + '"}]')
            sim = comp_with_stop_words.cosineSimilarity
            #             sim = comp_with_stop_words.jaccardDistance

            #             comp_without_stop_words = fullClient.compare('[{"text": "'+' '.join(sent2stokens_wostop(headline))+'"}, {"text": "'+' '.join(sent2stokens_wostop(bodyText_list[bodyIds_index[bodyID]]))+'"}]')
            #             sim = comp_without_stop_words.cosineSimilarity

            unrelated, related, y_true, y_pred = create_lists(
                sim, stance, threshold, [unrelated, related, y_true, y_pred])

            # keep track of the processed examples
            if (cnt1 == 100):
                print(cnt2 * 100)
                cnt2 += 1
                cnt1 = 0
            cnt1 += 1

        print_results([unrelated, related, y_true, y_pred], self.model_type)
Пример #3
0
def hammingCompare(outtweets, innerTwitter):
    client = retinasdk.FullClient(apiKey.retina_token,
                                  apiServer="http://api.cortical.io/rest",
                                  retinaName="en_associative")
    liteClient = retinasdk.LiteClient(apiKey.retina_token)
    res = []

    for index, outtweet in enumerate(outtweets):
        result = {}
        # get simHash
        simhash_pair = getSimHash(outtweet[2], innerTwitter, client)
        if len(simhash_pair) > 1:
            diff_bits = simhash.num_differing_bits(simhash_pair['out_hash'],
                                                   simhash_pair['in_hash'])
            hashes = [simhash_pair['out_hash'], simhash_pair['in_hash']]
            blocks = 4  # Number of blocks to use
            distance = 3  # Number of bits that may differ in matching pairs
            matches = simhash.find_all(hashes, blocks, distance)
            res.append([index, outtweet[2], matches])
    return res
Пример #4
0
    def __init__(self):
        self._sources = source_uri
        self._er = EventRegistry(apiKey=EVENT_REGISTRY_API_KEY)
        self._cortical_client = retinasdk.FullClient(
            CORTICAL_API_KEY,
            apiServer="http://api.cortical.io/rest",
            retinaName="en_associative")
        self._uid = str(uuid4())

        self._clfPath = '../ml/models/glove100d.hdf5'

        self.model = load_model(self._clfPath)
        self.graph = tf.get_default_graph()

        self._preload_path = "../ml/data/data_dump_glove.data"
        self._dataset = pickle.load(open(self._preload_path, "rb"))
        self._train_data = self._dataset["X_train"]
        self._train_labels = self._dataset["Y_train"]
        self._test_data = self._dataset["X_test"]

        self._tokenizer = self.fit_tokenizer()
Пример #5
0
import json
import retinasdk
from apiStorage import apiKey

# helper functions

# reusable client for handling API calls
sFunctionFullClient = retinasdk.FullClient(
    apiKey,
    apiServer="http://api.cortical.io/rest",
    retinaName="en_synonymous")

aFunctionFullClient = retinasdk.FullClient(
    apiKey,
    apiServer="http://api.cortical.io/rest",
    retinaName="en_associative")

FunctionLiteClient = retinasdk.LiteClient(apiKey)


# input: category - a fingerprint of the category filter
#        term     - the term you want to add to the category
# output: the resulting fingerprint of assimilating given term
def assimilateTermInCategory(category, term):
    orExpression = {"or": [{"positions": category}, {"term": term}]}
    return sFunctionFullClient.getFingerprintForExpression(
        json.dumps(orExpression)).positions


#input: FP1 - fingerprint 1 to be merged with FP2
#       FP2 - fingerprint 2
Пример #6
0
import retinasdk
from misc import bcolors, testfiles
import json
import pandas as pd
import numpy as np
from sys import stdout

liteClient = retinasdk.LiteClient("e29fcfe0")
fullClient = retinasdk.FullClient("your_api_key",
                                  apiServer="http://api.cortical.io/rest",
                                  retinaName="en_associative")


def compare_texts(texts1, texts2):
    print(bcolors.HEADER +
          "Compute similarity between sentences in dataframes:" + bcolors.ENDC)

    cosines = []
    i = 0
    l = len(texts1)

    for s1, s2 in zip(texts1, texts2):
        percent = i / l * 100
        stdout.write("\r{0:.3f} %".format(percent))
        stdout.flush()
        cosines.append(
            fullClient.compare(json.dumps([{
                "text": s1
            }, {
                "text": s2
            }])).cosineSimilarity)
Пример #7
0
def init_connections():
    ''' Function to get credentials and initiate all connections 
    return (workspace_id, bot_id, conversation_client, slack_client) '''
    global gv_nlu, gv_cortical_client, gv_bot_deafault_channel_name, gv_bot_deafault_channel_id, gv_objstore_conn, gv_ai

    # loading credentials from the file in case environmental variables are not set
    dotenv.load_dotenv(os.path.join(os.path.dirname(__file__), "config.env"))

    # Read credentials from env variable first and if not set read from config file
    # Watson conversation: "Conversation_KEEP", workspace - slackbotwatson
    conversation_username = os.environ.get(
        "CONVERSATION_USERNAME_1", os.getenv("CONVERSATION_USERNAME_F"))
    conversation_password = os.environ.get(
        "CONVERSATION_PASSWORD_1", os.getenv("CONVERSATION_PASSWORD_F"))
    workspace_id = os.environ.get("WORKSPACE_ID_1",
                                  os.getenv("WORKSPACE_ID_F"))

    # Slack: team - aesnewenergysolutions
    bot_id = os.environ.get("SLACK_BOT_USER_1", os.getenv("SLACK_BOT_USER_F"))
    bot_name = os.environ.get("SLACK_BOT_USER_NAME_1",
                              os.getenv("SLACK_BOT_USER_NAME_F"))
    slack_bot_token = os.environ.get("SLACK_BOT_TOKEN_1",
                                     os.getenv("SLACK_BOT_TOKEN_F"))
    gv_bot_deafault_channel_name = os.environ.get(
        "SLACK_BOT_DEFAULT_CHANNEL_1",
        os.getenv("SLACK_BOT_DEFAULT_CHANNEL_F"))

    # API.AI
    apiai_token = os.environ.get("APIAI_CLIENT_ACCESS_TOKEN_2",
                                 os.getenv("APIAI_CLIENT_ACCESS_TOKEN_F"))

    # Natural Language Understanding - Natural Language Understanding-h3
    nlu_username = os.environ.get("NLU_USERNAME_1",
                                  os.getenv("NLU_USERNAME_F"))
    nlu_password = os.environ.get("NLU_PASSWORD_1",
                                  os.getenv("NLU_PASSWORD_F"))

    # Bluemix Object Storage - “Object Storage-01”
    objstor_key = os.environ.get("OBJSTOR_KEY_1", os.getenv("OBJSTOR_KEY_F"))
    objstor_authurl = os.environ.get("OBJ_STOR_AUTHURL_1",
                                     os.getenv("OBJ_STOR_AUTHURL_F"))
    objstor_projectid = os.environ.get("OBJ_STOR_PROJECT_ID_1",
                                       os.getenv("OBJ_STOR_PROJECT_ID_F"))
    objstor_userid = os.environ.get("OBJ_STOR_USER_ID_1",
                                    os.getenv("OBJ_STOR_USER_ID_F"))
    objstor_region_name = os.environ.get("OBJ_STOR_REGION_NAME_1",
                                         os.getenv("OBJ_STOR_REGION_NAME_F"))

    # Cortical API Key
    cortical_key = os.environ.get("CORTICAL_KEY_1",
                                  os.getenv("CORTICAL_KEY_F"))

    if not all((conversation_username, conversation_password, workspace_id,
                bot_id, slack_bot_token, nlu_username, nlu_password,
                cortical_key, gv_bot_deafault_channel_name, apiai_token)):
        # If some of the service env vars are not set get them from VCAP
        vcap_env = None
        conversation_creds = None
        vcap_services = os.environ.get("VCAP_SERVICES")
        if vcap_services:
            vcap_env = json.loads(vcap_services)
        if vcap_env:

            conversation_creds = get_vcap_credentials(vcap_env, 'conversation')
            conversation_username = conversation_username or conversation_creds[
                'username']
            conversation_password = conversation_password or conversation_creds[
                'password']

            nlu_creds = get_vcap_credentials(vcap_env,
                                             'natural-language-understanding')
            nlu_username = nlu_username or nlu_creds['username']
            nlu_password = nlu_password or nlu_creds['password']

#                 bot_id = bot_id or conversation_creds['bot_id']
#                 bot_name = bot_name or conversation_creds['bot_name']
#                 slack_bot_token = slack_bot_token or conversation_creds['slack_bot_token']
#                 cortical_key = cortical_key or conversation_creds['cortical_key']
#                 gl_bot_deafault_channel_name = gv_bot_deafault_channel_name or conversation_creds['bot_deafault_channel']

# If we still don't have all the above plus a few, then no WOS.
        if not all(
            (conversation_username, conversation_password, workspace_id,
             bot_id, bot_name, slack_bot_token, nlu_username, nlu_password,
             cortical_key, gv_bot_deafault_channel_name, apiai_token)):
            print("Not all Environmental Variables are set")
            return None, None, None, None, None

    try:
        # Instantiate Cortical Client
        gv_cortical_client = retinasdk.FullClient(
            cortical_key,
            apiServer="http://api.cortical.io/rest",
            retinaName="en_associative")

        # Instantiate Watson Conversation client.
        conversation_client = ConversationV1(username=conversation_username,
                                             password=conversation_password,
                                             version='2016-09-20')

        gv_nlu = NaturalLanguageUnderstandingV1(username=nlu_username,
                                                password=nlu_password,
                                                version='2017-04-24')

        # instantiate Bluemix Object Storage
        gv_objstore_conn = swiftclient.Connection(key=objstor_key,
                                                  authurl=objstor_authurl,
                                                  auth_version='3',
                                                  os_options={
                                                      "project_id":
                                                      objstor_projectid,
                                                      "user_id":
                                                      objstor_userid,
                                                      "region_name":
                                                      objstor_region_name
                                                  })
        # Instantiate Slack chatbot.
        slack_client = SlackClient(slack_bot_token)

        # Instantiate AIP.AI conversation agent
        gv_ai = apiai.ApiAI(apiai_token)

    except:
        print("Connection to the Services could not be established !!!")
        return None, None, None, None, None

    # If BOT_ID wasn't set, we can get it using SlackClient and user ID.
    if not bot_id:
        api_call = slack_client.api_call("users.list")
        if api_call.get('ok'):
            # retrieve all users so we can find our bot
            users = api_call.get('members')
            for user in users:
                if bot_name in user and user.get(
                        'name') == conversation_username:
                    bot_id = user.get('id')
                    print("Found BOT_ID=" + bot_id)
                else:
                    print("could not find user with the name " +
                          conversation_username)
        else:
            print("could not find user because api_call did not return 'ok'")
            bot_id = None

        if not bot_id:
            print("Error: Missing BOT_ID or invalid SLACK_BOT_USER.")
            return None, None, None, None, None
    # get Channel ID for the default Channel
    vChannels = slack_client.api_call("channels.list", exclude_archived=1)
    for x in vChannels['channels']:
        if x["name"] == gv_bot_deafault_channel_name:
            gv_bot_deafault_channel_id = x["id"]


#     LOG.debug("Connection estabilished with both, Watson Bot and Slack !!!" )
    return (workspace_id, bot_id, bot_name, conversation_client, slack_client)
Пример #8
0
    xs, ys = np.array(xs), np.array(ys)
    x_train, x_test, y_train, y_test = train_test_split(xs,
                                                        ys,
                                                        test_size=0.33,
                                                        random_state=42)
    print('... done;')

    print('training classifier...')
    clf = LinearSVC()
    clf.fit(x_train, y_train)
    predicted = clf.predict(x_test)
    print(metrics.classification_report(y_test, predicted))
    print('done')

    with open(PICKLE_FILE_PATH, 'wb') as pickle_file:
        pickle.dump(clf, pickle_file)


if __name__ == "__main__":
    if not os.path.exists(PICKLE_FILE_PATH):
        train()
    with open(PICKLE_FILE_PATH, 'rb') as pickle_file:
        clf = pickle.load(pickle_file)
        config = Config("cortical")
        fullClient = retinasdk.FullClient(config["api_key"])
        test_txt = "Burning Down The House only sounds perfect when The Talking Heads perform it."
        positions = fullClient.getFingerprintForText(test_txt).positions
        test_sample = np.zeros(128 * 128, dtype=int)
        test_sample[positions] = 1
        print(clf.predict_text([test_sample]))
Пример #9
0
import time
import json
import retinasdk
import wikipedia
import urllib.request

fC = retinasdk.FullClient('37762630-a8ac-11e6-a057-97f4c970893c',
                          apiServer="http://api.cortical.io/rest",
                          retinaName="en_associative")
start_time = time.time()
# fC.compare(json.dumps([{"term": "apple"}, {"term": "oranges"}]))
# fC.compare(json.dumps([{"term": "math"}, {"term": "calculus"}]))
# print(fC.compare(json.dumps([{"term": "Donald Trump"}, {"term": "China"}])))
# print(fC.compareBulk(json.dumps([[{"term": "calculus"}, {"term": "math"}],
# 	[{"term": "trigonometry"}, {"term": "math"}], [{"term": "Donald Trump"}, {"term": "math"}]])))


def getSimilarity(metric):
    return metric.weightedScoring


def getFiveLinks(links):
    if len(links) < 5:
        return links
    return links[:5]


def getComparison(src, dst):
    return [{"term": src}, {"term": dst}]

Пример #10
0
def setupCio():
    """ Setup Cortical.io clients."""
    apiKey = os.environ.get("CORTICAL_API_KEY")
    cioFullClient = retinasdk.FullClient(apiKey)
    cioLiteClient = retinasdk.LiteClient(apiKey)
    return cioFullClient, cioLiteClient
Пример #11
0
def get_keywords(text):
    fullClient = retinasdk.FullClient(config.cortical_api_key,
                                      apiServer="http://api.cortical.io/rest",
                                      retinaName="en_associative")
    keywords = fullClient.getKeywordsForText(text)
    return (keywords)
Пример #12
0
"""
Transformer and tools to convert texts into their fingerprints. A fingerprint is a list of indexes in a 256*256 space
"""
from typing import Iterable, List, Optional, Any, Dict, cast

import numpy
import retinasdk
from retinasdk.model.fingerprint import Fingerprint as CorticalFingerprint
from scipy.sparse import lil_matrix, spmatrix
from sklearn.base import TransformerMixin

from common.config import get_config

TFingerprint = List[int]

_RETINA = retinasdk.FullClient(get_config().get("CORTICAL", "api_key"))


def _to_fingerprint(fingerprint: CorticalFingerprint) -> TFingerprint:
    positions = fingerprint.positions
    assert isinstance(positions, list)
    assert all(isinstance(idx, int) for idx in positions)
    return cast(TFingerprint, positions)


def get_fingerprints(texts: Iterable[str]) -> Iterable[TFingerprint]:
    """
    Get fingerprints for a batch of texts.
    :param texts: text batch
    :return: list of fingerprints
    """
Пример #13
0
 def __init__(self, apiKey, cacheDir, verbosity=0):
     self.cacheDir = cacheDir
     self.corticalClient = retinasdk.FullClient(
         apiKey,
         apiServer="http://api.cortical.io/rest",
         retinaName="en_synonymous")
Пример #14
0
import retinasdk, operator, json
import logging, nltk, numpy as np
from nltk.tokenize import RegexpTokenizer
from threading import Thread

np.seterr(divide='ignore', invalid='ignore')
fullClient = retinasdk.FullClient("c3412e70-f345-11e5-8378-4dad29be0fab",
                                  apiServer="http://api.cortical.io/rest",
                                  retinaName="en_associative")

stopwords = [
    'a', 'able', 'about', 'across', 'after', 'all', 'almost', 'also', 'am',
    'among', 'an', 'and', 'any', 'are', 'as', 'at', 'be', 'because', 'been',
    'but', 'by', 'can', 'cannot', 'could', 'dear', 'did', 'do', 'does',
    'either', 'else', 'ever', 'every', 'for', 'from', 'get', 'got', 'had',
    'has', 'have', 'he', 'her', 'hers', 'him', 'his', 'how', 'however', 'i',
    'if', 'in', 'into', 'is', 'it', 'its', 'just', 'least', 'let', 'like',
    'likely', 'may', 'me', 'might', 'most', 'must', 'my', 'neither', 'no',
    'nor', 'not', 'of', 'off', 'often', 'on', 'only', 'or', 'other', 'our',
    'own', 'rather', 'said', 'say', 'says', 'she', 'should', 'since', 'so',
    'some', 'than', 'that', 'the', 'their', 'them', 'then', 'there', 'these',
    'they', 'this', 'tis', 'to', 'too', 'twas', 'us', 'wants', 'was', 'we',
    'were', 'what', 'when', 'where', 'which', 'while', 'who', 'whom', 'why',
    'will', 'with', 'would', 'yet', 'you', 'your'
]


class Question_Similarity:
    """
	Class for finding a set of all similar Questions to a given Query Question.
	"""