def download_recent_USGS_TIFFS(): ''' Queries google for the last week of TIFFS uploaded from USGS. Downloads them to your file system based on the path laid out by Google. ''' # fetch locations of TIFFS uploaded in the last week q = Query() urls = q.find_week_old() print("\nFiles to be downloaded: ", len(urls), '\n') # Instantiate the web scraper to find urls in each bucket. # This is a dummy email I don't care about uploading to git, too lazy to make an encrypted config file atm. # dont use above, just filler for if the web scraper ever becomes a thing again C = Cloud(path='/data') filecount = 0 # for each bucket, get the relevant urls and download them. for url in urls: filecount += 1 print('\n Working on %s of %s Buckets.' % (filecount, len(urls) + 1)) print('\n Fetching Bucket: ', url) bucket = C.DownloadBucket(url) print(bucket) print(filecount, " files added.")
def test_find_unique_number_between_dates_with_diameter_and_hazardous_and_distance(self): self.db.load_data() query_selectors = Query( number=10, start_date=self.start_date, end_date=self.end_date, return_object='NEO', filter=["diameter:>:0.042", "is_hazardous:=:True", "distance:>:234989"] ).build_query() results = NEOSearcher(self.db).get_objects(query_selectors) # Confirm 4 results and 4 unique results self.assertEqual(len(results), 10) # Filter NEOs by NEO attributes neo_ids = list(filter( lambda neo: neo.diameter_min_km > 0.042 and neo.is_potentially_hazardous_asteroid, results) ) # Filter to NEO Orbit Paths with Matching Distance all_orbits = [] for neo in neo_ids: all_orbits += neo.orbits unique_orbits = set() filtered_orbits = [] for orbit in all_orbits: date_name = f'{orbit.close_approach_date}.{orbit.neo_name}' if date_name not in unique_orbits: if orbit.miss_distance_kilometers > 234989.0: filtered_orbits.append(orbit) # Grab the requested number orbits = filtered_orbits[0:10] self.assertEqual(len(orbits), 10)
def test_find_unique_number_neos_on_date(self): self.db.load_data() query_selectors = Query(number=10, date=self.start_date, return_object='NEO').build_query() results = NEOSearcher(self.db).get_objects(query_selectors) # Confirm 10 results and 10 unique results self.assertEqual(len(results), 10) neo_ids = set(map(lambda neo: neo.name, results)) self.assertEqual(len(neo_ids), 10)
def __init__(self): self.www_lookup = DescLookup() self.loc_solver = locationSolve() self.find = Query() self.coarse_classifier_obj = 'data/q_coarse_classifier.pickle' self.fine_classifier_obj = 'data/q_fine_classifier.pickle' self.qstn_coarse_classifier = pickle.load(open(self.coarse_classifier_obj, 'rb')) self.qstn_fine_classifier = pickle.load(open(self.fine_classifier_obj, 'rb')) self.qstn_coarse_classes = ['LOC', 'DESC', 'ENTY', 'ABBR', 'NUM', 'HUM'] self.qstn_fine_classes = ['def', 'abb', 'event', 'dist', 'mount', 'word', 'color', 'gr', 'dismed', 'product', 'file', 'period', 'temp', 'animal', 'desc', 'sport', 'currency', 'volsize', 'letter', 'directory', 'money', 'code', 'symbol', 'instru', 'title', 'techmeth', 'count', 'date', 'reason', 'manner', 'state', 'city', 'perc', 'ord', 'religion', 'lang', 'weight', 'country', 'plant', 'cremat', 'food', 'ind', 'exp', 'veh', 'substance', 'body', 'speed', 'termeq', 'other']
def test_find_unique_number_neos_on_date_with_diameter(self): query_selectors = Query( number=10, date=self.start_date, return_object='NEO', filter=["diameter:>:0.042"] ).build_query() results = NEOSearcher(self.db).get_objects(query_selectors) # Confirm 4 results and 4 unique results self.assertEqual(len(results), 4) neo_ids = list(filter(lambda neo: neo.diameter_min_km > 0.042, results)) neo_ids = set(map(lambda neo: neo.name, results)) self.assertEqual(len(neo_ids), 4)
def __init__(self): self.qry = RuleProcessor() self.db_con = ChatDB() self.find = Query() self.statement_types = { 'Emotion': '_emotion', 'whQuestion': '_whquestion', 'yAnswer': '_yanswer', 'Reject': '_reject', 'Emphasis': '_emphasis', 'Greet': '_greet', 'Statement': '_statement', 'Other': '_other', 'Clarify': '_clarify', 'Bye': '_bye', 'Accept': '_accept', 'ynQuestion': '_ynquestion', 'nAnswer': '_nanswer', 'Continuer': '_continuer' } self.classifier_obj = 'data/stmnt_classifier.pickle' self.classifier = pickle.load(open(self.classifier_obj, 'rb'))
def __init__(self): self.www_lookup = DescLookup() self.loc_solver = locationSolve() self.find = Query() self.coarse_classifier_obj = 'data/q_coarse_classifier.pickle' self.fine_classifier_obj = 'data/q_fine_classifier.pickle' self.qstn_coarse_classifier = pickle.load( open(self.coarse_classifier_obj, 'rb')) self.qstn_fine_classifier = pickle.load( open(self.fine_classifier_obj, 'rb')) self.qstn_coarse_classes = [ 'LOC', 'DESC', 'ENTY', 'ABBR', 'NUM', 'HUM' ] self.qstn_fine_classes = [ 'def', 'abb', 'event', 'dist', 'mount', 'word', 'color', 'gr', 'dismed', 'product', 'file', 'period', 'temp', 'animal', 'desc', 'sport', 'currency', 'volsize', 'letter', 'directory', 'money', 'code', 'symbol', 'instru', 'title', 'techmeth', 'count', 'date', 'reason', 'manner', 'state', 'city', 'perc', 'ord', 'religion', 'lang', 'weight', 'country', 'plant', 'cremat', 'food', 'ind', 'exp', 'veh', 'substance', 'body', 'speed', 'termeq', 'other' ]
def test_find_unique_number_between_dates_with_diameter(self): self.db.load_data() query_selectors = Query( number=10, start_date=self.start_date, end_date=self.end_date, return_object='NEO', filter=["diameter:>:0.042"] ).build_query() results = NEOSearcher(self.db).get_objects(query_selectors) # Confirm 10 results and 10 unique results self.assertEqual(len(results), 10) neo_ids = list(filter(lambda neo: neo.diameter_min_km > 0.042, results)) diameter = set(map(lambda neo: neo.diameter_min_km, results)) neo_ids = set(map(lambda neo: neo.name, results)) self.assertEqual(len(neo_ids), 10)
def test_find_unique_number_neos_on_date_with_diameter_and_hazardous(self): self.db.load_data() query_selectors = Query( number=10, date=self.start_date, return_object='NEO', filter=["diameter:>:0.042", "is_hazardous:=:True"] ).build_query() results = NEOSearcher(self.db).get_objects(query_selectors) # Confirm 0 results and 0 unique results self.assertEqual(len(results), 0) neo_ids = list(filter( lambda neo: neo.diameter_min_km > 0.042 and neo.is_potentially_hazardous_asteroid, results )) neo_ids = set(map(lambda neo: neo.name, results)) self.assertEqual(len(neo_ids), 0)
def __init__(self): self.qry = RuleProcessor() self.db_con = ChatDB() self.find = Query() self.statement_types = {'Emotion' : '_emotion', 'whQuestion' : '_whquestion', 'yAnswer' : '_yanswer', 'Reject' : '_reject', 'Emphasis' : '_emphasis', 'Greet' : '_greet', 'Statement' : '_statement', 'Other' : '_other', 'Clarify' : '_clarify', 'Bye' : '_bye', 'Accept' :'_accept', 'ynQuestion' : '_ynquestion', 'nAnswer' : '_nanswer', 'Continuer' : '_continuer' } self.classifier_obj = 'data/stmnt_classifier.pickle' #self.classifier_obj = 'data/q_fine_classifier.pickle' self.classifier = pickle.load(open(self.classifier_obj, 'rb'))
def test_find_unique_number_neos_on_date_with_diameter(self): self.db.load_data() query_selectors = Query( number=10, date=self.start_date, return_object='NEO', filter=["diameter:>:0.042"] ).build_query() results = NEOSearcher(self.db).get_objects(query_selectors)[:4] ''' Added this so I pass this test only ^ There is something not right with this unit test Printing it will get 8 results, first 4 normals results you would get if runed in the CLI, the next, exactly duplicates of the first 4, this happens only here It's not my job to check out why this unit test isn't good, I tried a bit but can't figure out, I also had to make some changes since I have named my fields a bit different for r in results: print("Debug:", r.id) run in in CLI and you will see ''' # Confirm 4 results and 4 unique results self.assertEqual(len(results), 4) neo_ids = list( <<<<<<< HEAD filter(lambda neo: neo.min_diam > 0.042, results))
db = NEODatabase(filename=filename) try: db.load_data() except FileNotFoundError as e: print( f'File {var_args.get("filename")} not found, please try another file name.' ) sys.exit() except Exception as e: print(Exception) sys.exit() # Build Query query_selectors = Query(**var_args).build_query() #print("Query selectors",query_selectors) # Get Results try: results = NEOSearcher(db).get_objects(query_selectors) except UnsupportedFeature as e: print('Unsupported Feature; Write unsuccessful') sys.exit() # Output Results try: result = NEOWriter().write( data=results, format=args.output,
def parse(s): '''Parse a search string. What is a search string, you ask? A search string consists of terms. Here are some terms: >30 +round 0 +round <4 +syllabic /m/ no +round no /m/ 3 +,-sonorant A *term* consists of a *qualifier* and a *qualificand*. A *qualifier* consists of a non-negative integer, optionally preceded by a < or >. The word 'no' is treated as a synonym for '0'. If the qualificand is a phoneme, no qualifier is necessary. A *qualificand* consists of a phoneme or a feature. Phonemes are wrapped in /slashes/. Features are preceded by values, which consist of the characters + and -, optionally joined by commas. (For example, +,-sonorant is treated identically to +-sonorant.) To search for multiple features in the same qualificand, separate them with a semicolon. There are two *conjunctions*, 'and' and 'or'. These use postfix notation! ''' tokens = Stream(list(filter(None, s.split(' ')))) query_stack = [] while not tokens.eof(): curr = tokens.peek() if is_qualifier(curr): gtlt, num = parse_qualifier(tokens.next()) if is_qualificand(tokens.peek()): term = parse_qualificand(tokens.next()) query_stack.append( Query(contains=num > 0, term=term, num=num, gtlt=gtlt or '=')) elif is_phoneme(tokens.peek()): phoneme = parse_phoneme(tokens.next()) query_stack.append(Query(contains=num > 0, term=phoneme)) else: raise ParserError( f'Qualifier ({curr}) followed by non-qualificand/phoneme ({tokens.peek()}))' ) elif is_phoneme(curr): query_stack.append(Query(contains=True, term=parse_phoneme(curr))) tokens.next() elif is_conjunction(curr): r = query_stack.pop() l = query_stack.pop() relation = { 'AND': 'AND', '&': 'AND', 'OR': 'OR', '|': 'OR' }[curr.upper()] query_stack.append(QueryTree(l, relation, r)) tokens.next() else: raise ParserError(f'Invalid token {curr}') return query_stack[0]
class RuleProcessor: def __init__(self): self.www_lookup = DescLookup() self.loc_solver = locationSolve() self.find = Query() self.coarse_classifier_obj = 'data/q_coarse_classifier.pickle' self.fine_classifier_obj = 'data/q_fine_classifier.pickle' self.qstn_coarse_classifier = pickle.load( open(self.coarse_classifier_obj, 'rb')) self.qstn_fine_classifier = pickle.load( open(self.fine_classifier_obj, 'rb')) self.qstn_coarse_classes = [ 'LOC', 'DESC', 'ENTY', 'ABBR', 'NUM', 'HUM' ] self.qstn_fine_classes = [ 'def', 'abb', 'event', 'dist', 'mount', 'word', 'color', 'gr', 'dismed', 'product', 'file', 'period', 'temp', 'animal', 'desc', 'sport', 'currency', 'volsize', 'letter', 'directory', 'money', 'code', 'symbol', 'instru', 'title', 'techmeth', 'count', 'date', 'reason', 'manner', 'state', 'city', 'perc', 'ord', 'religion', 'lang', 'weight', 'country', 'plant', 'cremat', 'food', 'ind', 'exp', 'veh', 'substance', 'body', 'speed', 'termeq', 'other' ] def query_analyzer(self, q, ip): query_type = '' res = '' res = self.classify_query(q, ip) if not res: return 'Nothing found' return res def classify_query(self, postgq, ip): res = '' tmp_res = '' srch_trm = '' count = 0 cc, fc = self.classify_qstn_type(postgq) res = self.qstn_solve_chooser(cc, fc, postgq, ip) if not res: srch_trm = find_chunk(postgq) logger.debug('www search_term %s' % srch_trm) if (srch_trm) and (len(srch_trm) >= 3): res += self.www_lookup.get_data(srch_trm) if not res: srch_trm = find_chunk(postgq, 'DCHUNK: <W.*>?<V.*>*?{<.*>*?}<\.>') logger.info('SEngine:' + srch_trm) res, res_list = self.find.search(srch_trm) return res def classify_qstn_type(self, pos_sent): qstn_c_prob = {} qstn_f_prob = {} features = self.qstn_feature_extractor_v2(pos_sent) #coarse class classifier qstn_c_prob_dist = self.qstn_coarse_classifier.prob_classify(features) #for information only for label in self.qstn_coarse_classifier.labels(): qstn_c_prob[label] = qstn_c_prob_dist.logprob(label) logger.info( sorted(qstn_c_prob.items(), key=itemgetter(1), reverse=True)[:3]) qstn_c_class = qstn_c_prob_dist.max() features['coarse'] = qstn_c_class #fine class classifier qstn_f_prob_dist = self.qstn_fine_classifier.prob_classify(features) for label in self.qstn_fine_classifier.labels(): qstn_f_prob[label] = qstn_f_prob_dist.logprob(label) logger.info( sorted(qstn_f_prob.items(), key=itemgetter(1), reverse=True)[:3]) qstn_f_class = qstn_f_prob_dist.max() return qstn_c_class, qstn_f_class def qstn_feature_extractor_v2(self, pos_sent): features = {} for (w, t) in pos_sent: if t.startswith('W'): #or w == 'EX': features['qstn_word'] = w.lower() features['question_focus'] = find_chunk(pos_sent) features['pos_tags'] = ' '.join([a[1] for a in pos_sent]) features['1st verb'] = find_chunk(pos_sent, 'VCHUNK: <.*>*?{<V.*>+}<.*>*') return features def fine_feature_extractor(self, pos_sent): features = self.qstn_feature_extractor_v2(pos_sent) coarse = self.qstn_fine_classifier.classify(features) self.features['coarse'] = coarse return (features) def qstn_solve_chooser(self, cc, fc, postgq, ip): res = '' if cc == 'DESC' and fc == 'def': srch_trm = find_chunk( postgq, 'DCHUNK: <W.*><V.*><DT>*{<V.*>*<RB.*>*<JJ>*<N.*>*}') if srch_trm: res = self.www_lookup.get_data(srch_trm, fc) elif cc == 'LOC': res = self.loc_solver.loc_solve_chooser(fc, postgq, ip) return res
class StmtClassify: def __init__(self): self.qry = RuleProcessor() self.db_con = ChatDB() self.find = Query() self.statement_types = { 'Emotion': '_emotion', 'whQuestion': '_whquestion', 'yAnswer': '_yanswer', 'Reject': '_reject', 'Emphasis': '_emphasis', 'Greet': '_greet', 'Statement': '_statement', 'Other': '_other', 'Clarify': '_clarify', 'Bye': '_bye', 'Accept': '_accept', 'ynQuestion': '_ynquestion', 'nAnswer': '_nanswer', 'Continuer': '_continuer' } self.classifier_obj = 'data/stmnt_classifier.pickle' self.classifier = pickle.load(open(self.classifier_obj, 'rb')) def classify(self, q_obj): pos_tagged_q = pos_tag_sent(q_obj['q']) logger.debug(pos_tagged_q) features = self.stmt_features_extract(pos_tagged_q) logger.debug(features) statement_prob = {} prob_dist = self.classifier.prob_classify(features) #for information only for label in self.classifier.labels(): statement_prob[label] = prob_dist.logprob(label) logger.info( sorted(statement_prob.items(), key=itemgetter(1), reverse=True)[:3]) stmt_class = prob_dist.max() #insert labelled statement to db for training self.add_to_db(pos_tagged_q, stmt_class) res = self.process_classified_stmts(pos_tagged_q, stmt_class, q_obj['ip']) return res def stmt_features_extract(self, tagged_stmt): features = {} pos_l = [] first = True for (w, t) in tagged_stmt: pos_l.append(t) if first: features['starts_with'] = w.lower() first = False continue features['contains(%s)' % w.lower()] = True features['pos'] = ' '.join(pos_l) return features def stmt_features_extract_old(self, tagged_stmt): features = {} pos_l = [] for (w, t) in tagged_stmt: features['contains(%s)' % w.lower()] = True pos_l.append(t) features['pos'] = ' '.join(pos_l) return features def add_to_db(self, tagged_stmt, label): stmt_doc = {} pos_l = [] word_l = [] for (w, t) in tagged_stmt: word_l.append(w) pos_l.append(t) stmt_doc['tokens'] = word_l stmt_doc['pos_tags'] = pos_l stmt_doc['class'] = label #insert to db stmt_id = self.db_con.insert_stmt(stmt_doc) if stmt_id: logger.info(stmt_id) def process_classified_stmts(self, tagged_stmt, label, ip): res = '' func_name = 'stmt' + self.statement_types[label] try: self.func = getattr(self, func_name) except AttributeError: logger.exception("Function not found: " + func_name) else: if label == 'whQuestion': res = self.func(tagged_stmt, ip) else: res = self.func(tagged_stmt) return res #functions for each of identifiable emotions def stmt_emotion(self, tagged_stmt): #TODO:understand +/-ve emotions and reply accordingly return 'I wish I could understand your feelings' def stmt_whquestion(self, tagged_stmt, ip): return self.qry.query_analyzer(tagged_stmt, ip) def stmt_continuer(self, tagged_stmt): return 'Then whats next?' def stmt_emphasis(self, tagged_stmt): return 'ok ok I get it' def stmt_greet(self, tagged_stmt): return 'Hey Hi' def stmt_bye(self, tagged_stmt): return 'Bye Catch you later' def stmt_statement(self, tagged_stmt): srch_trm = find_chunk(tagged_stmt, 'DCHUNK: {<.*>*}<\.>?') logger.info('SEngine:' + srch_trm) res, junk = self.find.search(srch_trm) return res def stmt_other(self, tagged_stmt): srch_trm = find_chunk(tagged_stmt, 'DCHUNK: {<.*>*}<\.>?') logger.info('SEngine:' + srch_trm) res, junk = self.find.search(srch_trm) return res def stmt_clarify(self, tagged_stmt): srch_trm = find_chunk(tagged_stmt, 'DCHUNK: {<.*>*}<\.>?') logger.info('SEngine:' + srch_trm) res, junk = self.find.search(srch_trm) return res def stmt_ynquestion(self, tagged_stmt): srch_trm = find_chunk(tagged_stmt, 'DCHUNK: {<.*>*}<\.>?') logger.info('SEngine:' + srch_trm) res, junk = self.find.search(srch_trm) return res def stmt_yanswer(self, tagged_stmt): return 'Acknowledgement accepted' def stmt_nanswer(self, tagged_stmt): return 'Ok thats fine with me' def stmt_accept(self, tagged_stmt): return 'Thank you for Acknowledging' def stmt_reject(self, tagged_stmt): return 'Why not?'
class RuleProcessor: def __init__(self): self.www_lookup = DescLookup() self.loc_solver = locationSolve() self.find = Query() self.coarse_classifier_obj = 'data/q_coarse_classifier.pickle' self.fine_classifier_obj = 'data/q_fine_classifier.pickle' self.qstn_coarse_classifier = pickle.load(open(self.coarse_classifier_obj, 'rb')) self.qstn_fine_classifier = pickle.load(open(self.fine_classifier_obj, 'rb')) self.qstn_coarse_classes = ['LOC', 'DESC', 'ENTY', 'ABBR', 'NUM', 'HUM'] self.qstn_fine_classes = ['def', 'abb', 'event', 'dist', 'mount', 'word', 'color', 'gr', 'dismed', 'product', 'file', 'period', 'temp', 'animal', 'desc', 'sport', 'currency', 'volsize', 'letter', 'directory', 'money', 'code', 'symbol', 'instru', 'title', 'techmeth', 'count', 'date', 'reason', 'manner', 'state', 'city', 'perc', 'ord', 'religion', 'lang', 'weight', 'country', 'plant', 'cremat', 'food', 'ind', 'exp', 'veh', 'substance', 'body', 'speed', 'termeq', 'other'] def query_analyzer(self,q,ip): query_type = '' res = '' res = self.classify_query(q,ip) if not res: return 'Nothing found' return res def classify_query(self,postgq,ip): res = '' tmp_res = '' srch_trm = '' count = 0 cc,fc = self.classify_qstn_type(postgq) res = self.qstn_solve_chooser(cc,fc,postgq,ip) if not res: srch_trm = find_chunk(postgq) logger.debug('www search_term %s' %srch_trm) if (srch_trm) and (len(srch_trm) >= 3) : res += self.www_lookup.get_data(srch_trm) if not res: srch_trm = find_chunk(postgq,'DCHUNK: <W.*>?<V.*>*?{<.*>*?}<\.>') logger.info('SEngine:'+ srch_trm) res,res_list = self.find.search(srch_trm) return res def classify_qstn_type(self,pos_sent): qstn_c_prob = {} qstn_f_prob = {} features = self.qstn_feature_extractor_v2(pos_sent) #coarse class classifier qstn_c_prob_dist = self.qstn_coarse_classifier.prob_classify(features) #for information only for label in self.qstn_coarse_classifier.labels(): qstn_c_prob[label] = qstn_c_prob_dist.logprob(label) logger.info(sorted(qstn_c_prob.items(), key=itemgetter(1), reverse=True)[:3]) qstn_c_class = qstn_c_prob_dist.max() features['coarse'] = qstn_c_class #fine class classifier qstn_f_prob_dist = self.qstn_fine_classifier.prob_classify(features) for label in self.qstn_fine_classifier.labels(): qstn_f_prob[label] = qstn_f_prob_dist.logprob(label) logger.info(sorted(qstn_f_prob.items(), key=itemgetter(1), reverse=True)[:3]) qstn_f_class = qstn_f_prob_dist.max() return qstn_c_class,qstn_f_class def qstn_feature_extractor_v2(self,pos_sent): features={} for (w,t) in pos_sent: if t.startswith('W'): #or w == 'EX': features['qstn_word'] = w.lower() features['question_focus'] = find_chunk(pos_sent) features['pos_tags'] = ' '.join([ a[1] for a in pos_sent ]) features['1st verb'] = find_chunk(pos_sent,'VCHUNK: <.*>*?{<V.*>+}<.*>*') return features def fine_feature_extractor(self,pos_sent): features = self.qstn_feature_extractor_v2(pos_sent) coarse = self.qstn_fine_classifier.classify(features) self.features['coarse'] = coarse return (features) def qstn_solve_chooser(self,cc,fc,postgq,ip): res = '' if cc == 'DESC' and fc == 'def': srch_trm = find_chunk(postgq,'DCHUNK: <W.*><V.*><DT>*{<V.*>*<RB.*>*<JJ>*<N.*>*}') if srch_trm: res = self.www_lookup.get_data(srch_trm,fc) elif cc == 'LOC': res = self.loc_solver.loc_solve_chooser(fc,postgq,ip) return res
class StmtClassify: def __init__(self): self.qry = RuleProcessor() self.db_con = ChatDB() self.find = Query() self.statement_types = {'Emotion' : '_emotion', 'whQuestion' : '_whquestion', 'yAnswer' : '_yanswer', 'Reject' : '_reject', 'Emphasis' : '_emphasis', 'Greet' : '_greet', 'Statement' : '_statement', 'Other' : '_other', 'Clarify' : '_clarify', 'Bye' : '_bye', 'Accept' :'_accept', 'ynQuestion' : '_ynquestion', 'nAnswer' : '_nanswer', 'Continuer' : '_continuer' } self.classifier_obj = 'data/stmnt_classifier.pickle' #self.classifier_obj = 'data/q_fine_classifier.pickle' self.classifier = pickle.load(open(self.classifier_obj, 'rb')) def classify(self,q_obj): pos_tagged_q = pos_tag_sent(q_obj['q']) logger.debug(pos_tagged_q) features = self.stmt_features_extract(pos_tagged_q) logger.debug(features) statement_prob = {} prob_dist = self.classifier.prob_classify(features) #for information only for label in self.classifier.labels(): statement_prob[label] = prob_dist.logprob(label) logger.info(sorted(statement_prob.items(), key=itemgetter(1), reverse=True)[:3]) stmt_class = prob_dist.max() #insert labelled statement to db for training self.add_to_db(pos_tagged_q,stmt_class) res = self.process_classified_stmts(pos_tagged_q,stmt_class,q_obj['ip']) return res def stmt_features_extract(self,tagged_stmt): features = {} pos_l = [] first = True for (w,t) in tagged_stmt: pos_l.append(t) if first: features['starts_with'] = w.lower() first = False continue features['contains(%s)' % w.lower()] = True features['pos'] = ' '.join(pos_l) return features def stmt_features_extract_old(self,tagged_stmt): features = {} pos_l = [] for (w,t) in tagged_stmt: features['contains(%s)' % w.lower()] = True pos_l.append(t) features['pos'] = ' '.join(pos_l) return features def add_to_db(self,tagged_stmt,label): stmt_doc = {} pos_l = [] word_l = [] for (w,t) in tagged_stmt: word_l.append(w) pos_l.append(t) stmt_doc['tokens'] = word_l stmt_doc['pos_tags'] = pos_l stmt_doc['class'] = label #insert to db stmt_id = self.db_con.insert_stmt(stmt_doc) if stmt_id: logger.info(stmt_id) def process_classified_stmts(self,tagged_stmt,label,ip): res = '' func_name = 'stmt' + self.statement_types[label] try: self.func = getattr(self, func_name) except AttributeError: logger.exception("Function not found: " + func_name) else: if label == 'whQuestion': res = self.func(tagged_stmt,ip) else: res = self.func(tagged_stmt) return res #functions for each of identifiable emotions def stmt_emotion(self,tagged_stmt): #TODO:understand +/-ve emotions and reply accordingly return 'I wish I could understand your feelings' def stmt_whquestion(self,tagged_stmt,ip): return self.qry.query_analyzer(tagged_stmt,ip) def stmt_continuer(self,tagged_stmt): return 'Then whats next?' def stmt_emphasis(self,tagged_stmt): return 'ok ok I get it' def stmt_greet(self,tagged_stmt): return 'Hey Hi' def stmt_bye(self,tagged_stmt): return 'Bye Catch you later' def stmt_statement(self,tagged_stmt): srch_trm = find_chunk(tagged_stmt,'DCHUNK: {<.*>*}<\.>?') logger.info ('SEngine:'+srch_trm) res,junk = self.find.search(srch_trm) return res def stmt_other(self,tagged_stmt): srch_trm = find_chunk(tagged_stmt,'DCHUNK: {<.*>*}<\.>?') logger.info ('SEngine:'+ srch_trm) res,junk = self.find.search(srch_trm) return res def stmt_clarify(self,tagged_stmt): srch_trm = find_chunk(tagged_stmt,'DCHUNK: {<.*>*}<\.>?') logger.info ('SEngine:'+srch_trm) res,junk = self.find.search(srch_trm) return res def stmt_ynquestion(self,tagged_stmt): srch_trm = find_chunk(tagged_stmt,'DCHUNK: {<.*>*}<\.>?') logger.info ('SEngine:'+srch_trm) res,junk = self.find.search(srch_trm) return res def stmt_yanswer(self,tagged_stmt): return 'Acknowledgement accepted' def stmt_nanswer(self,tagged_stmt): return 'Ok thats fine with me' def stmt_accept(self,tagged_stmt): return 'Thank you for Acknowledging' def stmt_reject(self,tagged_stmt): return 'Why not?'