def _request_data(self, command, tweet, orig=None): # same as execute_human_response reply_to = "@%s" % command.get('bot_name') if command.get('command') == "CONNECT": msg = '%s GETINFO' % reply_to status = 'data-req' elif command.get('command') == "PAY": if orig: amount, description = Parsers.extract_payment(orig) else: amount, description = Parsers.extract_payment(tweet) msg = '%s GETINVOICE %d %s' % (reply_to, amount, description) status = 'data-req' elif command.get('command') == "FUNDCHANNEL": # TODO: Extract channel amounts from orig # if orig: peer_id = command.get('pubkey') if peer_id: msg = self.lnrpc._fundchannel(peer_id) status = 'complete' else: msg = '%s GETINFO' % reply_to status = 'data-req' else: raise ValueError("Command not found: %s" % command.get('command')) sid = self._post(msg, command.get('last_sid')) # update status return self.db.commands.update_status(command.get('sid'), sid, status)
def _process_bot_response(self, command, tweet, orig=None): if command.get('command') == "CONNECT": # Connect to new peer uri = Parsers.extract_uri(tweet) pubkey, ip, port = Parsers.extract_info(uri) msg = self.lnrpc._connect(pubkey, ip, port) uid = command.get('peer_uid') self.db.peers.set_node(uid, pubkey, ip, port) elif command.get('command') == "PAY": # Pay invoice # TODO: Validate invoice amount with decodepay bolt11 = Parsers.extract_bolt11(tweet) pay_info = self.lnrpc.decodepay(bolt11) msg = self.lnrpc._pay(bolt11) elif command.get('command') == "FUNDCHANNEL": # Process response to GETINFO # TODO: Extract channel amounts from orig # if orig: uri = Parsers.extract_uri(tweet) pubkey, _, _ = Parsers.extract_info(uri) msg = self.lnrpc._fundchannel(pubkey) sid = self._post("%s\nDONE." % msg, command.get('last_sid')) # update status return self.db.commands.update_status(command.get('sid'), sid, 'complete')
def main(): while True: print "\nPython Calculator: " line = list() while True: user_input = raw_input("Enter Expression: ") user_input = user_input.replace(' ', '') if user_input is not None and len(user_input.strip()) == 0: break user_input += " " line.append(user_input) break expr = line if len(expr) == 0: break try: scan = scanner.Scanner() tokens = list() tokens = scan.parseExpression(expr[0]) printTokens(tokens) parsers = Parsers() expressionTree = parsers.parse(tokens) evaluator = evaluators.Evaluators() result = evaluator.evaluate(expressionTree) print '\nResult:', print result except CalcExceptions, x: print 'Error!:', print x.message
def main(): while True: print "\nPython Calculator: " line = list() while True: user_input = raw_input("Enter Expression: ") user_input = user_input.replace(' ', '') print "\n Hellooooooooooollllllllllllll" if user_input is not None and len(user_input.strip()) == 0: break user_input += " " line.append(user_input) break expr = line if len(expr) == 0: break try: scan = scanner.Scanner() tokens = list() tokens = scan.parseExpression(expr[0]) printTokens(tokens) parsers = Parsers() expressionTree = parsers.parse(tokens) evaluator = evaluators.Evaluators() result = evaluator.evaluate(expressionTree) print '\nResult:', print result except CalcExceptions, x: print 'Error!:', print x.message
def __init__(self, data): # Store data self.data_Final = data # Read prepocessed data self.data = pd.read_csv("data/w2v_processed.csv") # Init NLP self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"]) self.nlp.max_length = 5000000 # Load w2v model self.w2v_model = KeyedVectors.load("data/utils/w2v_model.kvmodel") # Load contractions self.contractions_dict = pickle.load( open("data/utils/contractions_dict.p", "rb")) # Init parser self.parsers = Parsers() # Load id_doc2vector self.id_doc2vector = pickle.load(open("data/utils/id_doc2vec.p", "rb")) # Get query self.get_query(self.id_doc2vector)
def __init__(self, data): # Get data self.data_Final = data #Processed data self.data = pd.read_csv("data/d2v_processed.csv") #Init NLP self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"]) self.nlp.max_length = 5000000 #Load d2v model self.doc2vec_model = Doc2Vec.load("data/utils/d2v_model.kvmodel") #Load contractions self.contractions_dict = pickle.load( open("data/utils/contractions_dict.p", "rb")) # Init parsers self.parsers = Parsers() # Load tag id self.tag_id = pickle.load(open("data/utils/tag_id.p", "rb")) #Load id doc2vec self.id_doc2vec = pickle.load(open("data/utils/id_doc2vec.p", "rb")) #Get query - run program self.get_query(self.id_doc2vec)
def test_format_c(self): p = Parsers(self.LINE_TYPE_C) data = p.get_data() self.assertEqual(data['first_name'], self.FIRST_NAME) self.assertEqual(data['last_name'], self.LAST_NAME) self.assertEqual(data['phone_number'], self.CLEAN_NUMBER) self.assertEqual(data['color'], self.COLOR) self.assertEqual(data['zip_code'], self.ZIPCODE)
def test_format_a_bad_number(self, log): p = Parsers(self.BAD_NUMBER_TYPE_A) self.assertEqual(p.get_data(), {}) self.assertTrue(p.invalid) self.assertEqual( log.call_args[0][1], 'Phone number doesn\'t meet requirements: ' 'last_name, first_name, 1324152151352625, RED, 10013.')
def test_bad_zip_code_type_a(self, log): p = Parsers(self.BAD_ZIP_A) self.assertEqual(p.get_data(), {}) self.assertTrue(p.invalid) self.assertEqual( log.call_args[0][1], 'Zip code not found in data: ' 'last_name, first_name, (703)-742-0996, RED, 121311.')
def parse(self, soup): """ Determine the generation of the page's soup, and return a response code so that the page's soup can be parsed appropriately """ # todo determine parser to use parse_helper = Parsers(soup) parse_helper.bon_apetit_2020() return 'response_code'
def __init__(self, data): # Load data self.data = data # Parser self.parsers = Parsers() # Init tf-idf click.echo("Creating: index and tf-idf") self.index, self.tf, self.df, self.idf = self.load_index_tfidf() click.echo("Done.\n") #Ask for query self.get_query()
def _execute_bot_response(self, command, tweet): if command.get('command') == "GETINFO": msg = self.lnrpc.get_uri() elif command.get('command') == "GETINVOICE": amount, description = Parsers.extract_payment(tweet) msg = self.lnrpc.get_invoice(amount, command.get('bot_name'), description) sid = self._post(msg, command.get('last_sid')) # update status return self.db.commands.update_status(command.get('sid'), sid, 'complete')
class word2vec(object): def __init__(self, data): # Store data self.data_Final = data # Read prepocessed data self.data = pd.read_csv("data/w2v_processed.csv") # Init NLP self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"]) self.nlp.max_length = 5000000 # Load w2v model self.w2v_model = KeyedVectors.load("data/utils/w2v_model.kvmodel") # Load contractions self.contractions_dict = pickle.load( open("data/utils/contractions_dict.p", "rb")) # Init parser self.parsers = Parsers() # Load id_doc2vector self.id_doc2vector = pickle.load(open("data/utils/id_doc2vec.p", "rb")) # Get query self.get_query(self.id_doc2vector) def get_query(self, id_doc2vector): # Ask for query active = True while (active): click.echo( "######################################################") click.echo("TYPE 'X' TO EXIT.") click.echo("Insert query:") query = input() click.echo( "######################################################\n") if query == 'X' or query == 'x': click.echo("Exiting...") active = False else: self.search(query, self.id_doc2vector) return 0 def expand_contractions(self, text, contractions_re): """ Given contraction find match and substitude """ def replace(match): return self.contractions_dict[match.group(0)] return contractions_re.sub(replace, text) def clean_text(self, text): """ * Remove words with digits * Replace newline characters with space * Remove URLS * Replace non english chars with space """ # Remove digits text = re.sub('\w*\d\w*', '', text) # Remove new Line chars text = re.sub('\n', ' ', text) #Remove links text = re.sub(r"http\S+", "", text) #Replace non-english chars text = re.sub('[^a-z]', ' ', text) return text def preprocessing(self, text): """ Given a pandas dataframe apply preprocessing techinques * Lowercase the text * Expand Contractions * Clean the text * Remove Stopwords * Lemmatize words """ # Lower case text = text.lower() # Regular expression for finding contractions contractions_re = re.compile('(%s)' % '|'.join(self.contractions_dict.keys())) #Expand contractions text = self.expand_contractions(text, contractions_re) text = self.clean_text(text) #Remove added spaces text = re.sub(" +", " ", text) text = text.strip() #Stop words and Lemmatizing text = ' '.join([ token.lemma_ for token in list(self.nlp(text)) if (token.is_stop == False) ]) return text def embedding_w2v(self, doc_tokens): """ Returns vector representation of a string """ embeddings = [] if len(doc_tokens) < 1: return np.zeros(100) else: for t in doc_tokens: if t in self.w2v_model.wv.vocab: embeddings.append(self.w2v_model.wv.word_vec(t)) else: embeddings.append(np.random.rand(100)) return np.mean(embeddings, axis=0) def w2v_collection(self, data): """ Given a collection of documents returns the pair id:vector where the vector is the embedding representation of the doc. """ id_doc2v = {} for id, text in zip(data["id"].values, data["full_text"]): id_doc2v[id] = self.embedding_w2v(text) return id_doc2v def rank(self, query, id_doc2vec): """ Given a query preprocesses it, embeds it and return ordered dictionary of id:similarity_score pair. """ # Pre-process query query = self.preprocessing(query) # Query vector q_vector = self.embedding_w2v(query.split()) #Doc query similarity doc_query_sim = { k: cosine_similarity( np.array(v).reshape(1, -1), np.array(q_vector).reshape(1, -1)) for k, v in id_doc2vec.items() } # Sort doc_query_sim = { k: v for k, v in sorted( doc_query_sim.items(), key=lambda item: item[1], reverse=True) } return doc_query_sim def search(self, query, id_doc2vector, topn=20): """ Search for tweets inputing a query and see displayed results. Arguments: id_doc2vector: dic containing id:vec2doc pair - dic topn -- default: 20 - Top N result to display - int. """ # Get ranked docs doc_query_sim = self.rank(query, id_doc2vector) ids = list(doc_query_sim.keys())[:topn] click.echo("Results\n") for index, id in enumerate(ids): doc = self.data_Final[self.data_Final["id"] == id] tweet, date, author, retweets, favorites, url, hashtags = self.parsers.parser_tweet_results( doc) print("______________________________________________________") print(f"Tweet {index}") print(f"\t·Author: {author}") print(f"\t·Date: {date}") print(f"\t·Tweet: {tweet}") print(f"\t·Retweets: {retweets}") print(f"\t·Favorites: {favorites}") print(f"\t·Hashtags: {hashtags}") print(f"\t·URL: {url}") print("______________________________________________________\n")
def __init__(self, auth_cookie): Session.login(auth_cookie) self.portfolio = Parsers.get_portfolio() self.open_orders = self.portfolio.open_orders
def refresh_portfolio(self): self.portfolio = Parsers.get_portfolio() self.open_orders = self.portfolio.open_orders
def __init__(self, credentials): Session.login(credentials) self.portfolio = Parsers.get_portfolio() self.open_orders = self.portfolio.open_orders
class tf_idf(object): def __init__(self, data): # Load data self.data = data # Parser self.parsers = Parsers() # Init tf-idf click.echo("Creating: index and tf-idf") self.index, self.tf, self.df, self.idf = self.load_index_tfidf() click.echo("Done.\n") #Ask for query self.get_query() def get_query(self): # Ask for query active = True while(active): click.echo("######################################################") click.echo("TYPE 'X' TO EXIT.") click.echo("Insert query:") query = input() click.echo("######################################################\n") if query == 'X' or query =='x': click.echo("Exiting...") active = False else: self.search(query, self.index, self.idf, self.tf) return 0 def load_index_tfidf(self): """ Loads the preprocesed returns: Returns: index -- inverted list "term": [["id",[pos1,pos1,..]]. tf -- normalized term frequency per doc df -- document frequency per term idf -- inversed docuemnt frequency """ index = pickle.load(open("data/utils/index.p", "rb")) # Term freq of terms in tweets tf = pickle.load(open("data/utils/tf.p", "rb")) # Tweet freq of term in corpus df = pickle.load(open("data/utils/df.p", "rb")) # Inverse df idf = pickle.load(open("data/utils/idf.p", "rb")) return index, tf, df, idf def rankDocuments(self, terms, docs, index, idf, tf): """ Computes ranking given query and collection of tweets. Arguments: terms -- query - str. docs -- ID list of docs - list. index -- invertex index. - dict idf -- inverse document frequency - dict tf -- term frequency - dict Returns: resultDocs -- Ordered list of matching docs based on cosine-sim - list """ # Dict with vector per docID docVectors = defaultdict(lambda: [0]*len(terms)) # Vector per query queryVector = [0]*len(terms) # TF of query query_terms_count = collections.Counter(terms) # Norm query query_norm = np.linalg.norm(list(query_terms_count.values())) for termIndex, term in enumerate(terms): # Check if term exist in collection if term not in index: continue # Score per term-query queryVector[termIndex] = query_terms_count[term]/query_norm * idf[term] for docIndex, (doc,postings) in enumerate(index[term]): # check if IDdoc is in list of IDdocs containg term if doc in docs: # Score per term-doc docVectors[doc][termIndex] = tf[term][docIndex] * idf[term] #Cosine similarity query-doc docScores = [[np.dot(curDocVec, queryVector), doc] for doc, curDocVec in docVectors.items()] #Sort by descending similarity docScores.sort(reverse=True) #Get IDs resultDocs = [x[1] for x in docScores] return resultDocs def search_tf_idf(self, query, index, idf, tf, topn): """ Preprocess query and find docs with words in query Arguments: query -- query - str. index -- inverted index - dict idf -- inverse document frequency - dict tf -- term frequency - dict topn -- N top ranked docs to be returned - int Returns ranked_docs -- list of topn docs ranked by cosine-sim - list """ # Preprocess query query = self.parsers.getTerms(query) # Init set of docs with terms in query docs = set() for term in query: try: # Get IDs of docs with term termDocs = [posting[0] for posting in index[term]] # Add new docsID docs = docs.union(termDocs) except: pass docs = list(docs) # Rank docs with rankDocuments ranked_docs = self.rankDocuments(query, docs, index, idf, tf) ranked_docs = ranked_docs[:topn] return ranked_docs def search(self, query, index, idf, tf, topn = 20): """ Search for tweets inputing a query and see displayed results. Arguments: index -- inverted index - dict idf -- inverse document frequency - dict. tf -- term frequency - dict. topn -- default: 20 - Top N result to display - int. """ # Get topn docs ranked_docs = self.search_tf_idf(query, index, idf, tf, topn) if len(ranked_docs) == 0: click.echo("No results found !\n") return -1 click.echo("Results\n") for index, id in enumerate(ranked_docs): # Get tweet corresponding to id doc = self.data[self.data['id'] == id] tweet, date, author, retweets, favorites, url, hashtags = self.parsers.parser_tweet_results(doc) click.echo("______________________________________________________") click.echo(f"Tweet {index}") click.echo(f"\t·Author: {author}") click.echo(f"\t·Date: {date}") click.echo(f"\t·Tweet: {tweet}") click.echo(f"\t·Retweets: {retweets}") click.echo(f"\t·Favorites: {favorites}") click.echo(f"\t·Hashtags: {hashtags}") click.echo(f"\t·URL: {url}") click.echo("______________________________________________________\n")
class doc2vec(object): def __init__(self, data): # Get data self.data_Final = data #Processed data self.data = pd.read_csv("data/d2v_processed.csv") #Init NLP self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"]) self.nlp.max_length = 5000000 #Load d2v model self.doc2vec_model = Doc2Vec.load("data/utils/d2v_model.kvmodel") #Load contractions self.contractions_dict = pickle.load( open("data/utils/contractions_dict.p", "rb")) # Init parsers self.parsers = Parsers() # Load tag id self.tag_id = pickle.load(open("data/utils/tag_id.p", "rb")) #Load id doc2vec self.id_doc2vec = pickle.load(open("data/utils/id_doc2vec.p", "rb")) #Get query - run program self.get_query(self.id_doc2vec) def get_query(self, id_doc2vector): # Ask for query active = True while (active): click.echo( "\n######################################################") click.echo("TYPE 'X' TO EXIT.") click.echo("Insert query:") query = input() click.echo( "######################################################\n") if query == 'X' or query == 'x': click.echo("Exiting...") active = False else: self.search(query, self.tag_id, self.id_doc2vec) return 0 def search(self, query, tag_id, id_doc2vector, topn=20): """ Search for tweets inputing a query and see displayed results. Arguments: id_doc2vector -- dic containing id:vec2doc pair - dic topn -- default: 20 - Top N result to display - int. """ # Get ranked docs doc_query_sim = self.rank(query, self.tag_id) ids = doc_query_sim[:topn] click.echo("Results\n") for index, id in enumerate(ids): doc = self.data_Final[self.data_Final["id"] == id] tweet, date, author, retweets, favorites, url, hashtags = self.parsers.parser_tweet_results( doc) click.echo( "______________________________________________________") click.echo(f"Tweet {index}") click.echo(f"\t·Author: {author}") click.echo(f"\t·Date: {date}") click.echo(f"\t·Tweet: {tweet}") click.echo(f"\t·Retweets: {retweets}") click.echo(f"\t·Favorites: {favorites}") click.echo(f"\t·Hashtags: {hashtags}") click.echo(f"\t·ULR: {url}") click.echo( "______________________________________________________\n") def rank(self, query, tag_id): """ Given a query preprocesses it, embeds it and return ordered dictionary of id:similarity_score pair. """ # Pre-process query query = self.preprocessing(query) # Query vector q_vector = self.doc2vec_model.infer_vector(query.split()) #Doc query similarity tag_sim = self.doc2vec_model.docvecs.most_similar([q_vector], topn=20) # Get Ids ids = [tag_id[id_[0]] for id_ in tag_sim] return ids def expand_contractions(self, text, contractions_dict, contractions_re): """ Given contraction find match and substitude """ def replace(match): return contractions_dict[match.group(0)] return contractions_re.sub(replace, text) def clean_text(self, text): """ * Remove words with digits * Replace newline characters with space * Remove URLS * Replace non english chars with space """ # Remove digits text = re.sub('\w*\d\w*', '', text) # Remove new Line chars text = re.sub('\n', ' ', text) #Remove links text = re.sub(r"http\S+", "", text) #Replace non-english chars text = re.sub('[^a-z]', ' ', text) return text def preprocessing(self, text): """ Given a pandas dataframe apply preprocessing techinques * Lowercase the text * Expand Contractions * Clean the text * Remove Stopwords * Lemmatize words """ # Lower case text = text.lower() # Regular expression for finding contractions contractions_re = re.compile('(%s)' % '|'.join(self.contractions_dict.keys())) #Expand contractions text = self.expand_contractions(text, self.contractions_dict, contractions_re) text = self.clean_text(text) #Remove added spaces text = re.sub(" +", " ", text) text = text.strip() #Stop words and Lemmatizing text = ' '.join([ token.lemma_ for token in list(self.nlp(text)) if (token.is_stop == False) ]) return text