def rfmodel_pred(df, fname): connection = basilica.Connection(getenv('BASILICA_KEY')) model = load(fname) n = df['x1'].iloc[0] + ' ' + df['x2'].iloc[0] embedding = connection.embed_sentence(n, model="reddit") s = model.predict([embedding]) return s
def prepare_string(self, string): embedding = None with basilica.Connection('370a60d1-2938-b1bf-d813-0cb6954f5a0e') as c: embedding = c.embed_sentence(string, model='reddit', timeout=120) df = pd.Series(embedding) df = pd.DataFrame(df).T return df
def basilica_api(): API_KEY = os.getenv("BASILICA_API_KEY") connection = basilica.Connection(API_KEY) return connection # embeddings = connection.embed_sentences(["Hello world!", "How are you?"]) # print(list(embeddings)) # [[0.8556405305862427, ...], ...]
def get_sentence_vector(sentence): with basilica.Connection(BASILICA_KEY) as c: embedding = c.embed_sentence(sentence, model='twitter') # if saving # filename = EMB_DIR+text_class+'-'+str(cell['index'])+'.emb' # print(f"Saving {filename} | {text_class} | {cell['index']}") # with open(filename, 'w') as f: # f.write(json.dumps(embedding)) return embedding
def calculate_user_text_embedding(input, user_input_embedding): # setting a string of two sentences for the algo to compare sentences = [input] # calculating embedding for both user_entered_text and for features with basilica.Connection('36a370e3-becb-99f5-93a0-a92344e78eab') as c: user_input_embedding = list(c.embed_sentences(sentences)) return user_input_embedding
def get_upload_save_it_and_return_embedding() -> Tuple[array, str]: f = request.files['img'] uploaded_image = os.path.join(STATIC, TMP, secure_filename(f.filename)) f.save(uploaded_image) with basilica.Connection(BASILICA_KEY) as c: embedding_ = c.embed_image_file(uploaded_image, opts={'dimensions': 2048}) return embedding_, uploaded_image
def test_sameconnection(): print("\nTest concurrent embedding calls") sentences_outer = ["This is a sentence!"] * 39 sentences_inner = ["This sentence not so same?"] * 39 sentences_truth = ["This is a sentence!", "This sentence not so same?"] # getting ground-truth with basilica.Connection(test_key) as c: sentences_truth = list(c.embed_sentences(sentences_truth)) # Created another connection emb_outer = [] emb_inner = [] with basilica.Connection(test_key) as c: for x in c.embed_sentences(gen(sentences_outer)): emb_outer.append(x) for y in c.embed_sentences(gen(sentences_inner)): emb_inner.append(y) assert all([sufficiently_equal(sentences_truth[0], e) for e in emb_outer]) assert all([sufficiently_equal(sentences_truth[1], e) for e in emb_inner]) print("Test Passed!")
def strain_advisor(self, input, neighbors=5): """embed input from user and return strains""" with basilica.Connection(self.API_KEY) as c: embedded_sentence = c.embed_sentence(input) embedded = np.stack([embedded_sentence], axis=0) scaled = self.scaler.transform(embedded) pca = self.pca.transform(scaled) normalized = self.normalizer.transform(pca) results = self.nn.kneighbors(normalized, neighbors)[1][0].tolist() return results
def test_fakekey_large(): print("\nTest embedding 768 sentences with fake_key") print("Expecting HTTPError") begin = time.time() try: with basilica.Connection(fake_key) as c: embeddings = list(c.embed_sentences(sentences_large)) except Exception as err: print("Exception rasied : " + str(err)) else: print("Test failed, embeddings returend with length of ", len(embeddings)) t1 = time.time() - begin print("time took : " + str(t1))
def test_testkey_large(): print("\nTest embedding 768 sentences with TEST_KEY") begin = time.time() try: with basilica.Connection(test_key) as c: embeddings = list(c.embed_sentences(sentences_large)) except Exception as err: print("Test failed, Exception raised : " + str(err)) else: assert len(embeddings) == 768 assert all([len(e) == 512 for e in embeddings]) print("No exception raised, test passed") t1 = time.time() - begin print("time took : " + str(t1))
def test_demokey_small(): print("\nTest embedding 3 sentences with SLOW_DEMO_KEY") begin = time.time() try: with basilica.Connection('SLOW_DEMO_KEY') as c: embeddings = list(c.embed_sentences(sentences_small)) except Exception as err: print(err) else: assert len(embeddings) == 3 assert all([len(e) == 512 for e in embeddings]) print("No exception raised, test passed") t1 = time.time() - begin print("time took : " + str(t1))
def test_timeout(timeout=0.01): print("\nTest timeout") print("Expecting Exeptions") begin = time.time() try: with basilica.Connection(test_key) as c: embeddings = list( c.embed_sentences(sentences_large, timeout=timeout)) except Exception as err: print("Exception raised:" + str(err)) else: print("No exception raised, test failed") t1 = time.time() - begin print("time took : " + str(t1))
def strain_suggester(self, input, neighbors=5): """ Use Basilica to embed input from user and return strains. Input run through a process that includes a Standard Scaler,PCA to reduce dimensionality down to 75, normalize input, then a nearest neighbors model that returns 5 recommended strains id's. """ with basilica.Connection(self.API_KEY) as c: embedded_sentence = c.embed_sentence(input) embedded = np.stack([embedded_sentence], axis=0) scaled = self.scaler.transform(embedded) pcaed = self.pca.transform(scaled) normalized = self.normalizer.transform(pcaed) results = self.nn.kneighbors(normalized, neighbors)[1][0].tolist() return results
def get_user(screen_name=None): print(screen_name) try: twitter_user = twitter_api_client.get_user(screen_name) # find or create database user: db_user = User.query.get(twitter_user.id) or User(id=twitter_user.id) db_user.screen_name = twitter_user.screen_name db_user.name = twitter_user.name db_user.location = twitter_user.location db_user.followers_count = twitter_user.followers_count db.session.add(db_user) db.session.commit() #breakpoint() statuses = twitter_api_client.user_timeline(screen_name, tweet_mode="extended", count=50, exclude_replies=True, include_rts=False) db_tweets = [] for status in statuses: print(status.full_text) print("----") # Find or create database tweet: db_tweet = Tweet.query.get(status.id) or Tweet(id=status.id) db_tweet.user_id = status.author.id # or db_user.id db_tweet.full_text = status.full_text with basilica.Connection(API_KEY) as c: embedding = list(c.embed_sentence(status.full_text, model = "twitter")) db_tweet.embedding = embedding db.session.add(db_tweet) db_tweets.append(db_tweet) db.session.commit() return render_template("user.html", user=db_user, tweets=statuses) # tweets=db_tweets except: return jsonify({"message": "OOPS User Not Found!"})
def predict(user_input_symptoms): '''A function that takes user input symptoms and returns strains that best match input''' # Get the data df = pd.read_csv('symptoms.csv') # unpickle unpickled = pd.read_pickle('pickled_model') # Set input to a list, so we can embed input user_input_list = [user_input] # Embed input with basilica.Connection('72a5b6d3-09a2-974d-adb0-eee91584cfc7') as c: user_input_symptoms_embeddings = c.embed_sentences(user_input_list) return user_input_symptoms_embeddings # run the function to save the embedding value in session memory # (Find out more about this) user_input_symptoms_embedding = calculate_user_text_embedding( user_input, user_input_embedding)
def embed_basilica_to_df(self): """ This function will time how long basilica takes to run. For faster performance pass an API key that is functional. Returns the df with a column named 'embedded'. """ self.make_one_column() from timeit import default_timer as timer start = timer() column_embedded = [] for column in self.df['joined_columns']: sentence = column with basilica.Connection(self.API_KEY) as c: embedding = list( c.embed_sentence(sentence, model='email', version='default', opts={}, timeout=5)) column_embedded.append(embedding) self.df['embedded'] = column_embedded end = timer() print(end - start) return self.df
def embed_basilica_to_df(self): """ This function will time how long basilica takes to run. For faster performance pass an API key that is functional. Returns the df with a column named 'embedded'. """ self.make_one_column() from timeit import default_timer as timer start = timer() column_embedded = [] for n in range(0, len(self.df['joined_columns']), 64): sentences = self.df['joined_columns'].iloc[n:n + 64] with basilica.Connection(self.API_KEY) as c: embeddings = c.embed_sentences(sentences, model='email', version='default', opts={}, timeout=20) for e in embeddings: column_embedded.append(e) self.df['embedded'] = column_embedded end = timer() print(end - start) return self.df
from .models import DB, Tweet, User # https://greatlist.com/happiness/must-follow-twitter-accounts TWITTER_USERS = ['calebhicks', 'elonmusk', 'rrherr', 'SteveMartinToGO', 'alyankovic', 'nasa', 'sadserver', 'jkhowland', 'austen', 'common_squirrel', 'KenJennings', 'connanobrien', 'big_ben_clock', 'IAM_SHAKESPEARE'] TWITTER_AUTH = tweepy.OAuthHandler(config('TWITTER_CONSUMER_KEY'), config('TWITTER_CONSUMER_SECRET')) TWITTER_AUTH.set_access_token(config('TWITTER_ACCESS_TOKEN'), config('TWITTER_ACCESS_TOKEN_SECRET')) TWITTER = tweepy.API(TWITTER_AUTH) BASILICA = basilica.Connection(config('BASILICA_KEY')) def add_or_update_user(username): """Add or update a user and their Tweets, error if not a Twitter user.""" try: twitter_user = TWITTER.get_user(username) db_user = (User.query.get(twitter_user.id) or User(id=twitter_user.id, name=username)) DB.session.add(db_user) # We want as many recent non-retweet/reply statuses as we can get # 200 is a Twitter API limit, we'll usually see less due to exclusions tweets = twitter_user.timeline( count=250, exclude_replies=True, include_rts=False, tweet_mode='extended', since_id=db_user.newest_tweet_id) if tweets:
from os import getenv import basilica import tweepy from .models import DB, Tweet, User TWITTER_USERS = [ 'calebhicks', 'elonmusk', 'rrherr', 'SteveMartinToGo', 'alyankovic', 'nasa', 'sadserver', 'jkhowland', 'austen', 'common_squirrel', 'KenJennings', 'conanobrien', 'big_ben_clock', 'IAM_SHAKESPEARE' ] TWITTER_API_KEY = getenv('TWITTER_API_KEY') TWITTER_API_KEY_SECRET = getenv('TWITTER_API_KEY_SECRET') TWITTER_AUTH = tweepy.OAuthHandler(TWITTER_API_KEY, TWITTER_API_KEY_SECRET) TWITTER = tweepy.API(TWITTER_AUTH) BASILICA = basilica.Connection(getenv('BASILICA_KEY')) def add_or_update_user(username): """Add or update a user and their Tweets, error if not a Twitter user.""" try: twitter_user = TWITTER.get_user(username) db_user = (User.query.get(twitter_user.id) or User(id=twitter_user.id, name=username)) DB.session.add(db_user) # Lets get the tweets - focusing on primary (not retweet/reply) tweets = twitter_user.timeline(count=200, exclude_replies=True, include_rts=False, tweet_mode='extended', since_id=db_user.newest_tweet_id)
import basilica import os from dotenv import load_dotenv load_dotenv() basilica_apikey = os.getenv("BASILICA_API_KEY", default="BASILICA_api_key") connection = basilica.Connection(basilica_apikey) print('CONNECTION', type(connection)) if __name__ == "__main__": embedding = connection.embed_sentences("hey this is great", model="twitter") print(type(embedding)) tweets = ["Hello World", " The quick brown fox", "such a great world"] embeddings = connection.embed_sentences(tweets, model="twitter") for embed in embeddings: print("___________________") print(len(embed))
import basilica import os from dotenv import load_dotenv load_dotenv() BASILICA_API_KEY = os.getenv("BASILICA_API_KEY") connection = basilica.Connection(BASILICA_API_KEY) print(type(connection)) #> <class 'basilica.Connection'> if __name__ == "__main__": embedding = connection.embed_sentence("hey this is a cool tweet", model="twitter") print(embedding) # > a list of 768 numbers tweets = [ "Hello workd", "artificial intelligence", "another tweet here #cool" ] embeddings = connection.embed_sentences(tweets, model="twitter") for embed in embeddings: print("-----") print(len(embed))
import basilica import os from dotenv import load_dotenv load_dotenv() BASCILICA_API_KEY = os.getenv("BASCILICA_API_KEY", default="oops") sentences = [ "This is a sentence!", "This is a similar sentence!", "I don't think this sentence is very similar at all...", ] with basilica.Connection(BASCILICA_API_KEY) as connection: #connection = Connection(BASCILICA_API_KEY) print(type(connection)) embeddings = list(connection.embed_sentences(sentences)) for embed in embeddings: print("---------") print(embed) # a list of 786 floats from -1 to 1 breakpoint() embedding = connection.embed_sentence("Hello World") print(embedding)
import basilica from .models import DB, Tweet, User TWITTER_USERS = ['calebhicks', 'elonmusk', 'rrherr', 'SteveMartinToGo', 'alyankovic', 'nasa', 'sadserver', 'jkhowland', 'austen', 'common_squirrel', 'KenJennings', 'conanobrien', 'big_ben_clock', 'IAM_SHAKESPEARE'] load_dotenv() API_KEY = os.getenv("API_KEY") API_SECRET_KEY = os.getenv("API_SECRET_KEY") BEARER_TOKEN = os.getenv("BEARER_TOKEN") BASILICA_KEY = os.getenv("BASILICA_KEY") b = basilica.Connection(BASILICA_KEY) # Grants authorization TWITTER_AUTH = tweepy.OAuthHandler(API_KEY, API_SECRET_KEY) TWITTER = tweepy.API(TWITTER_AUTH) DB = SQLAlchemy() user = '******' twitter_user = TWITTER.get_user(user) tweets = twitter_user.timeline(count = 5, exclude_replies=True, include_rts=False, tweet_mode = 'extended',) tweet_text = tweets[0].full_text embedding = b.embed_sentence(tweet_text, model = 'twitter') def add_or_update_user(username):
# web_app/services/basilica_service.py from dotenv import load_dotenv import os import basilica load_dotenv() API_KEY = os.getenv("BASILICA_API_KEY") connection = basilica.Connection(API_KEY) # basilica_api = basilica_api_client() if __name__ == "__main__": print("---------") sentence = "Hello again" sent_embeddings = connection.embed_sentence(sentence) print(list(sent_embeddings)) print("---------") sentences = ["Hello world!", "How are you?"] print(sentences) # it is more efficient to make a single request for all sentences... embeddings = connection.embed_sentences(sentences) print("EMBEDDINGS...") print(type(embeddings)) print(list(embeddings)) # [[0.8556405305862427, ...], ...] print("---------") tweet_text = "I love #ArtificialIntelligence" tweet_embedding = connection.embed_sentence(tweet_text, model="twitter")
def basilica_api_client(): connection = basilica.Connection(API_KEY) print(type(connection)) return connection
def basilica_api_client(): connection = basilica.Connection(API_KEY) return connection
def authbasilica(): """Authenticate to basilica API""" return basilica.Connection(config('BASILICA_KEY'))
# pylint: disable=import-error import basilica import os from dotenv import load_dotenv load_dotenv() API_KEY = os.getenv("BASILICA_API_KEY") connection = basilica.Connection(API_KEY) if __name__ == "__main__": sentences = [ "This is a sentence!", "This is a similar sentence!", "I don't think this sentence is very similar at all..." ] with basilica.Connection(API_KEY) as c: embeddings = list(c.embed_sentences(sentences)) print(embeddings) [[0.8556405305862427, ...], ...] # embedding - data type "generator" - need to convert. # embeddings - list. for embedding in embeddings: print(len(embedding)) print(list(embedding)) print("-------")
import os from dotenv import load_dotenv import basilica load_dotenv() basilica_api_key = os.getenv('basilica_api_key', default='Oops') sentences = [ "This is a sentence.", "This is a similar sentence.", "I don't think this sentence is very similar at all..." ] with basilica.Connection(basilica_api_key) as c: embeddings = c.embed_sentences(sentences) print(list(embeddings)) for emb in embeddings: print(type(emb)) print(emb) print('-----------------') connection.close()
def basilica_connection(): connection = basilica.Connection(BASILICA_API_KEY) print(connection) return connection