def rfmodel_pred(df, fname):
    connection = basilica.Connection(getenv('BASILICA_KEY'))
    model = load(fname)
    n = df['x1'].iloc[0] + ' ' + df['x2'].iloc[0]
    embedding = connection.embed_sentence(n, model="reddit")
    s = model.predict([embedding])
    return s
示例#2
0
 def prepare_string(self, string):
     embedding = None
     with basilica.Connection('370a60d1-2938-b1bf-d813-0cb6954f5a0e') as c:
         embedding = c.embed_sentence(string, model='reddit', timeout=120)
     df = pd.Series(embedding)
     df = pd.DataFrame(df).T
     return df
def basilica_api():
    API_KEY = os.getenv("BASILICA_API_KEY")
    connection = basilica.Connection(API_KEY)
    return connection


# embeddings = connection.embed_sentences(["Hello world!", "How are you?"])
# print(list(embeddings)) # [[0.8556405305862427, ...], ...]
示例#4
0
def get_sentence_vector(sentence):
    with basilica.Connection(BASILICA_KEY) as c:
        embedding = c.embed_sentence(sentence, model='twitter')
        # if saving
        # filename = EMB_DIR+text_class+'-'+str(cell['index'])+'.emb'
        # print(f"Saving {filename} | {text_class} | {cell['index']}")
        # with open(filename, 'w') as f:
        #     f.write(json.dumps(embedding))
        return embedding
示例#5
0
    def calculate_user_text_embedding(input, user_input_embedding):

        # setting a string of two sentences for the algo to compare
        sentences = [input]

        # calculating embedding for both user_entered_text and for features
        with basilica.Connection('36a370e3-becb-99f5-93a0-a92344e78eab') as c:
            user_input_embedding = list(c.embed_sentences(sentences))

        return user_input_embedding
def get_upload_save_it_and_return_embedding() -> Tuple[array, str]:
    f = request.files['img']
    uploaded_image = os.path.join(STATIC, TMP, secure_filename(f.filename))
    f.save(uploaded_image)

    with basilica.Connection(BASILICA_KEY) as c:
        embedding_ = c.embed_image_file(uploaded_image,
                                        opts={'dimensions': 2048})

    return embedding_, uploaded_image
示例#7
0
def test_sameconnection():
    print("\nTest concurrent embedding calls")
    sentences_outer = ["This is a sentence!"] * 39
    sentences_inner = ["This sentence not so same?"] * 39
    sentences_truth = ["This is a sentence!", "This sentence not so same?"]
    # getting ground-truth
    with basilica.Connection(test_key) as c:
        sentences_truth = list(c.embed_sentences(sentences_truth))
    # Created another connection
    emb_outer = []
    emb_inner = []
    with basilica.Connection(test_key) as c:
        for x in c.embed_sentences(gen(sentences_outer)):
            emb_outer.append(x)
            for y in c.embed_sentences(gen(sentences_inner)):
                emb_inner.append(y)
    assert all([sufficiently_equal(sentences_truth[0], e) for e in emb_outer])
    assert all([sufficiently_equal(sentences_truth[1], e) for e in emb_inner])
    print("Test Passed!")
    def strain_advisor(self, input, neighbors=5):
        """embed input from user and return strains"""
        with basilica.Connection(self.API_KEY) as c:
            embedded_sentence = c.embed_sentence(input)
        
        embedded = np.stack([embedded_sentence], axis=0)
        scaled = self.scaler.transform(embedded)
        pca = self.pca.transform(scaled)
        normalized = self.normalizer.transform(pca)

        results = self.nn.kneighbors(normalized, neighbors)[1][0].tolist()

        return results
示例#9
0
def test_fakekey_large():
    print("\nTest embedding 768 sentences with fake_key")
    print("Expecting HTTPError")
    begin = time.time()
    try:
        with basilica.Connection(fake_key) as c:
            embeddings = list(c.embed_sentences(sentences_large))
    except Exception as err:
        print("Exception rasied : " + str(err))
    else:
        print("Test failed, embeddings returend with length of ",
              len(embeddings))
    t1 = time.time() - begin
    print("time took : " + str(t1))
示例#10
0
def test_testkey_large():
    print("\nTest embedding 768 sentences with TEST_KEY")
    begin = time.time()
    try:
        with basilica.Connection(test_key) as c:
            embeddings = list(c.embed_sentences(sentences_large))
    except Exception as err:
        print("Test failed, Exception raised : " + str(err))
    else:
        assert len(embeddings) == 768
        assert all([len(e) == 512 for e in embeddings])
        print("No exception raised, test passed")
    t1 = time.time() - begin
    print("time took : " + str(t1))
示例#11
0
def test_demokey_small():
    print("\nTest embedding 3 sentences with SLOW_DEMO_KEY")
    begin = time.time()
    try:
        with basilica.Connection('SLOW_DEMO_KEY') as c:
            embeddings = list(c.embed_sentences(sentences_small))
    except Exception as err:
        print(err)
    else:
        assert len(embeddings) == 3
        assert all([len(e) == 512 for e in embeddings])
        print("No exception raised, test passed")
    t1 = time.time() - begin
    print("time took : " + str(t1))
示例#12
0
def test_timeout(timeout=0.01):
    print("\nTest timeout")
    print("Expecting Exeptions")
    begin = time.time()
    try:
        with basilica.Connection(test_key) as c:
            embeddings = list(
                c.embed_sentences(sentences_large, timeout=timeout))
    except Exception as err:
        print("Exception raised:" + str(err))
    else:
        print("No exception raised, test failed")
    t1 = time.time() - begin
    print("time took : " + str(t1))
示例#13
0
    def strain_suggester(self, input, neighbors=5):
        """
        Use Basilica to embed input from user and return strains.
        Input run through a process that includes a Standard Scaler,PCA to
        reduce dimensionality down to 75, normalize input, then a
        nearest neighbors model that returns 5
        recommended strains id's.
        """
        with basilica.Connection(self.API_KEY) as c:
            embedded_sentence = c.embed_sentence(input)

        embedded = np.stack([embedded_sentence], axis=0)
        scaled = self.scaler.transform(embedded)
        pcaed = self.pca.transform(scaled)
        normalized = self.normalizer.transform(pcaed)

        results = self.nn.kneighbors(normalized, neighbors)[1][0].tolist()

        return results
示例#14
0
def get_user(screen_name=None):
    print(screen_name)

    try:

        twitter_user = twitter_api_client.get_user(screen_name)

        # find or create database user:
        db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id)
        db_user.screen_name = twitter_user.screen_name
        db_user.name = twitter_user.name
        db_user.location = twitter_user.location
        db_user.followers_count = twitter_user.followers_count
        db.session.add(db_user)
        db.session.commit()

        #breakpoint()

        statuses = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=50, exclude_replies=True, include_rts=False)
        db_tweets = []
        for status in statuses:
            print(status.full_text)
            print("----")

            # Find or create database tweet:
            db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id)
            db_tweet.user_id = status.author.id # or db_user.id
            db_tweet.full_text = status.full_text

            with basilica.Connection(API_KEY) as c:
                embedding = list(c.embed_sentence(status.full_text, model = "twitter"))

            db_tweet.embedding = embedding
            db.session.add(db_tweet)
            db_tweets.append(db_tweet)

        db.session.commit()

        return render_template("user.html", user=db_user, tweets=statuses) # tweets=db_tweets

    except:
        return jsonify({"message": "OOPS User Not Found!"})
示例#15
0
def predict(user_input_symptoms):
    '''A function that takes user input symptoms and returns strains that best match input'''
    # Get the data
    df = pd.read_csv('symptoms.csv')

    # unpickle
    unpickled = pd.read_pickle('pickled_model')

    # Set input to a list, so we can embed input
    user_input_list = [user_input]

    # Embed input
    with basilica.Connection('72a5b6d3-09a2-974d-adb0-eee91584cfc7') as c:
        user_input_symptoms_embeddings = c.embed_sentences(user_input_list)

    return user_input_symptoms_embeddings

    # run the function to save the embedding value in session memory
    # (Find out more about this)
    user_input_symptoms_embedding = calculate_user_text_embedding(
        user_input, user_input_embedding)
示例#16
0
 def embed_basilica_to_df(self):
     """
     This function will time how long basilica takes to run. For faster performance pass an API key that is functional.
     Returns the df with a column named 'embedded'.
     """
     self.make_one_column()
     from timeit import default_timer as timer
     start = timer()
     column_embedded = []
     for column in self.df['joined_columns']:
         sentence = column
         with basilica.Connection(self.API_KEY) as c:
             embedding = list(
                 c.embed_sentence(sentence,
                                  model='email',
                                  version='default',
                                  opts={},
                                  timeout=5))
         column_embedded.append(embedding)
     self.df['embedded'] = column_embedded
     end = timer()
     print(end - start)
     return self.df
示例#17
0
 def embed_basilica_to_df(self):
     """
     This function will time how long basilica takes to run. For faster performance pass an API key that is functional.
     Returns the df with a column named 'embedded'.
     """
     self.make_one_column()
     from timeit import default_timer as timer
     start = timer()
     column_embedded = []
     for n in range(0, len(self.df['joined_columns']), 64):
         sentences = self.df['joined_columns'].iloc[n:n + 64]
         with basilica.Connection(self.API_KEY) as c:
             embeddings = c.embed_sentences(sentences,
                                            model='email',
                                            version='default',
                                            opts={},
                                            timeout=20)
             for e in embeddings:
                 column_embedded.append(e)
     self.df['embedded'] = column_embedded
     end = timer()
     print(end - start)
     return self.df
示例#18
0
from .models import DB, Tweet, User


# https://greatlist.com/happiness/must-follow-twitter-accounts
TWITTER_USERS = ['calebhicks', 'elonmusk', 'rrherr', 'SteveMartinToGO',
                 'alyankovic', 'nasa', 'sadserver', 'jkhowland', 'austen',
                 'common_squirrel', 'KenJennings', 'connanobrien',
                 'big_ben_clock', 'IAM_SHAKESPEARE']

TWITTER_AUTH = tweepy.OAuthHandler(config('TWITTER_CONSUMER_KEY'),
                                   config('TWITTER_CONSUMER_SECRET'))
TWITTER_AUTH.set_access_token(config('TWITTER_ACCESS_TOKEN'),
                              config('TWITTER_ACCESS_TOKEN_SECRET'))
TWITTER = tweepy.API(TWITTER_AUTH)

BASILICA = basilica.Connection(config('BASILICA_KEY'))


def add_or_update_user(username):
    """Add or update a user and their Tweets, error if not a Twitter user."""
    try:
        twitter_user = TWITTER.get_user(username)
        db_user = (User.query.get(twitter_user.id) or
                   User(id=twitter_user.id, name=username))
        DB.session.add(db_user)
        # We want as many recent non-retweet/reply statuses as we can get
        # 200 is a Twitter API limit, we'll usually see less due to exclusions
        tweets = twitter_user.timeline(
            count=250, exclude_replies=True, include_rts=False,
            tweet_mode='extended', since_id=db_user.newest_tweet_id)
        if tweets:
示例#19
0
from os import getenv
import basilica
import tweepy
from .models import DB, Tweet, User

TWITTER_USERS = [
    'calebhicks', 'elonmusk', 'rrherr', 'SteveMartinToGo', 'alyankovic',
    'nasa', 'sadserver', 'jkhowland', 'austen', 'common_squirrel',
    'KenJennings', 'conanobrien', 'big_ben_clock', 'IAM_SHAKESPEARE'
]

TWITTER_API_KEY = getenv('TWITTER_API_KEY')
TWITTER_API_KEY_SECRET = getenv('TWITTER_API_KEY_SECRET')
TWITTER_AUTH = tweepy.OAuthHandler(TWITTER_API_KEY, TWITTER_API_KEY_SECRET)
TWITTER = tweepy.API(TWITTER_AUTH)
BASILICA = basilica.Connection(getenv('BASILICA_KEY'))


def add_or_update_user(username):
    """Add or update a user and their Tweets, error if not a Twitter user."""
    try:
        twitter_user = TWITTER.get_user(username)
        db_user = (User.query.get(twitter_user.id)
                   or User(id=twitter_user.id, name=username))
        DB.session.add(db_user)
        # Lets get the tweets - focusing on primary (not retweet/reply)
        tweets = twitter_user.timeline(count=200,
                                       exclude_replies=True,
                                       include_rts=False,
                                       tweet_mode='extended',
                                       since_id=db_user.newest_tweet_id)
import basilica
import os
from dotenv import load_dotenv

load_dotenv()

basilica_apikey = os.getenv("BASILICA_API_KEY", default="BASILICA_api_key")

connection = basilica.Connection(basilica_apikey)
print('CONNECTION', type(connection))

if __name__ == "__main__":

    embedding = connection.embed_sentences("hey this is great",
                                           model="twitter")
    print(type(embedding))

    tweets = ["Hello World", " The quick brown fox", "such a great world"]
    embeddings = connection.embed_sentences(tweets, model="twitter")
    for embed in embeddings:
        print("___________________")
        print(len(embed))
import basilica
import os
from dotenv import load_dotenv

load_dotenv()

BASILICA_API_KEY = os.getenv("BASILICA_API_KEY")

connection = basilica.Connection(BASILICA_API_KEY)
print(type(connection))  #> <class 'basilica.Connection'>

if __name__ == "__main__":

    embedding = connection.embed_sentence("hey this is a cool tweet",
                                          model="twitter")
    print(embedding)
    # > a list of 768 numbers

    tweets = [
        "Hello workd", "artificial intelligence", "another tweet here #cool"
    ]
    embeddings = connection.embed_sentences(tweets, model="twitter")
    for embed in embeddings:
        print("-----")
        print(len(embed))
示例#22
0
import basilica

import os
from dotenv import load_dotenv
load_dotenv()

BASCILICA_API_KEY = os.getenv("BASCILICA_API_KEY", default="oops")
sentences = [
    "This is a sentence!",
    "This is a similar sentence!",
    "I don't think this sentence is very similar at all...",
]
with basilica.Connection(BASCILICA_API_KEY) as connection:
    #connection = Connection(BASCILICA_API_KEY)
    print(type(connection))

embeddings = list(connection.embed_sentences(sentences))
for embed in embeddings:
    print("---------")
    print(embed)  # a list of 786 floats from -1 to 1

breakpoint()

embedding = connection.embed_sentence("Hello World")
print(embedding)
示例#23
0
import basilica
from .models import DB, Tweet, User

TWITTER_USERS = ['calebhicks', 'elonmusk', 'rrherr', 'SteveMartinToGo',
                 'alyankovic', 'nasa', 'sadserver', 'jkhowland', 'austen',
                 'common_squirrel', 'KenJennings', 'conanobrien',
                 'big_ben_clock', 'IAM_SHAKESPEARE']
                 
load_dotenv()

API_KEY = os.getenv("API_KEY")
API_SECRET_KEY = os.getenv("API_SECRET_KEY")
BEARER_TOKEN = os.getenv("BEARER_TOKEN")
BASILICA_KEY = os.getenv("BASILICA_KEY")

b = basilica.Connection(BASILICA_KEY)

# Grants authorization
TWITTER_AUTH = tweepy.OAuthHandler(API_KEY, API_SECRET_KEY)
TWITTER = tweepy.API(TWITTER_AUTH)
DB = SQLAlchemy()
user = '******'
twitter_user = TWITTER.get_user(user)

tweets = twitter_user.timeline(count = 5, exclude_replies=True,
                                    include_rts=False,
                                    tweet_mode = 'extended',)
tweet_text = tweets[0].full_text
embedding = b.embed_sentence(tweet_text, model = 'twitter')

def add_or_update_user(username):
示例#24
0
# web_app/services/basilica_service.py
from dotenv import load_dotenv
import os
import basilica

load_dotenv()

API_KEY = os.getenv("BASILICA_API_KEY")

connection = basilica.Connection(API_KEY)
# basilica_api = basilica_api_client()

if __name__ == "__main__":
    
    print("---------")
    sentence = "Hello again"
    sent_embeddings = connection.embed_sentence(sentence)
    print(list(sent_embeddings))

    print("---------")
    sentences = ["Hello world!", "How are you?"]
    print(sentences)
    # it is more efficient to make a single request for all sentences...
    embeddings = connection.embed_sentences(sentences)
    print("EMBEDDINGS...")
    print(type(embeddings))
    print(list(embeddings))  # [[0.8556405305862427, ...], ...]

    print("---------")
    tweet_text = "I love #ArtificialIntelligence"
    tweet_embedding = connection.embed_sentence(tweet_text, model="twitter")
示例#25
0
def basilica_api_client():
    connection = basilica.Connection(API_KEY)
    print(type(connection))
    return connection
示例#26
0
def basilica_api_client():
    connection = basilica.Connection(API_KEY)
    return connection

    
示例#27
0
def authbasilica():
    """Authenticate to basilica API"""
    return basilica.Connection(config('BASILICA_KEY'))
示例#28
0
# pylint: disable=import-error

import basilica
import os
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv("BASILICA_API_KEY")

connection = basilica.Connection(API_KEY)

if __name__ == "__main__":

    sentences = [
        "This is a sentence!", "This is a similar sentence!",
        "I don't think this sentence is very similar at all..."
    ]

    with basilica.Connection(API_KEY) as c:
        embeddings = list(c.embed_sentences(sentences))
    print(embeddings)
    [[0.8556405305862427, ...], ...]
    # embedding - data type "generator" - need to convert.
    # embeddings - list.

    for embedding in embeddings:
        print(len(embedding))
        print(list(embedding))
        print("-------")
示例#29
0
import os
from dotenv import load_dotenv
import basilica

load_dotenv()

basilica_api_key = os.getenv('basilica_api_key', default='Oops')

sentences = [
    "This is a sentence.", "This is a similar sentence.",
    "I don't think this sentence is very similar at all..."
]

with basilica.Connection(basilica_api_key) as c:
    embeddings = c.embed_sentences(sentences)
    print(list(embeddings))

for emb in embeddings:
    print(type(emb))
    print(emb)
    print('-----------------')

connection.close()
示例#30
0
def basilica_connection():
    connection = basilica.Connection(BASILICA_API_KEY)
    print(connection)
    return connection