示例#1
0
def download_recent_USGS_TIFFS():
    '''
        Queries google for the last week of TIFFS uploaded from USGS.

        Downloads them to your file system based on the path laid out by Google.

    '''
    # fetch locations of TIFFS uploaded in the last week
    q = Query()
    urls = q.find_week_old()

    print("\nFiles to be downloaded: ", len(urls), '\n')

    # Instantiate the web scraper to find urls in each bucket.
    # This is a dummy email I don't care about uploading to git, too lazy to make an encrypted config file atm.
    # dont use above, just filler for if the web scraper ever becomes a thing again

    C = Cloud(path='/data')

    filecount = 0
    # for each bucket, get the relevant urls and download them.
    for url in urls:
        filecount += 1
        print('\n Working on %s of %s Buckets.' % (filecount, len(urls) + 1))
        print('\n Fetching Bucket: ', url)
        bucket = C.DownloadBucket(url)
        print(bucket)

    print(filecount, " files added.")
    def test_find_unique_number_between_dates_with_diameter_and_hazardous_and_distance(self):
        self.db.load_data()
        query_selectors = Query(
            number=10, start_date=self.start_date, end_date=self.end_date,
            return_object='NEO',
            filter=["diameter:>:0.042", "is_hazardous:=:True", "distance:>:234989"]
        ).build_query()
        results = NEOSearcher(self.db).get_objects(query_selectors)

        # Confirm 4 results and 4 unique results
        self.assertEqual(len(results), 10)

        # Filter NEOs by NEO attributes
        neo_ids = list(filter(
            lambda neo: neo.diameter_min_km > 0.042 and neo.is_potentially_hazardous_asteroid, results)
        )

        # Filter to NEO Orbit Paths with Matching Distance
        all_orbits = []
        for neo in neo_ids:
            all_orbits += neo.orbits
        unique_orbits = set()
        filtered_orbits = []
        for orbit in all_orbits:
            date_name = f'{orbit.close_approach_date}.{orbit.neo_name}'
            if date_name not in unique_orbits:
                if orbit.miss_distance_kilometers > 234989.0:
                    filtered_orbits.append(orbit)

        # Grab the requested number
        orbits = filtered_orbits[0:10]
        self.assertEqual(len(orbits), 10)
 def test_find_unique_number_neos_on_date(self):
     self.db.load_data()
     query_selectors = Query(number=10, date=self.start_date, return_object='NEO').build_query()
     results = NEOSearcher(self.db).get_objects(query_selectors)
     # Confirm 10 results and 10 unique results
     self.assertEqual(len(results), 10)
     neo_ids = set(map(lambda neo: neo.name, results))
     self.assertEqual(len(neo_ids), 10)
示例#4
0
 def __init__(self):
     self.www_lookup = DescLookup()
     self.loc_solver = locationSolve()
     self.find = Query()
     self.coarse_classifier_obj = 'data/q_coarse_classifier.pickle'
     self.fine_classifier_obj = 'data/q_fine_classifier.pickle'
     self.qstn_coarse_classifier = pickle.load(open(self.coarse_classifier_obj, 'rb'))
     self.qstn_fine_classifier = pickle.load(open(self.fine_classifier_obj, 'rb'))
     self.qstn_coarse_classes = ['LOC', 'DESC', 'ENTY', 'ABBR', 'NUM', 'HUM']
     self.qstn_fine_classes = ['def', 'abb', 'event', 'dist', 'mount', 'word', 'color', 'gr', 'dismed', 'product', 'file', 'period', 'temp', 'animal', 'desc', 'sport', 'currency', 'volsize', 'letter', 'directory', 'money', 'code', 'symbol', 'instru', 'title', 'techmeth', 'count', 'date', 'reason', 'manner', 'state', 'city', 'perc', 'ord', 'religion', 'lang', 'weight', 'country', 'plant', 'cremat', 'food', 'ind', 'exp', 'veh', 'substance', 'body', 'speed', 'termeq', 'other']
示例#5
0
    def test_find_unique_number_neos_on_date_with_diameter(self):
        query_selectors = Query(
            number=10, date=self.start_date, return_object='NEO', filter=["diameter:>:0.042"]
        ).build_query()
        results = NEOSearcher(self.db).get_objects(query_selectors)

        # Confirm 4 results and 4 unique results
        self.assertEqual(len(results), 4)
        neo_ids = list(filter(lambda neo: neo.diameter_min_km > 0.042, results))
        neo_ids = set(map(lambda neo: neo.name, results))
        self.assertEqual(len(neo_ids), 4)
示例#6
0
 def __init__(self):
     self.qry = RuleProcessor()
     self.db_con = ChatDB()
     self.find = Query()
     self.statement_types = {
         'Emotion': '_emotion',
         'whQuestion': '_whquestion',
         'yAnswer': '_yanswer',
         'Reject': '_reject',
         'Emphasis': '_emphasis',
         'Greet': '_greet',
         'Statement': '_statement',
         'Other': '_other',
         'Clarify': '_clarify',
         'Bye': '_bye',
         'Accept': '_accept',
         'ynQuestion': '_ynquestion',
         'nAnswer': '_nanswer',
         'Continuer': '_continuer'
     }
     self.classifier_obj = 'data/stmnt_classifier.pickle'
     self.classifier = pickle.load(open(self.classifier_obj, 'rb'))
示例#7
0
 def __init__(self):
     self.www_lookup = DescLookup()
     self.loc_solver = locationSolve()
     self.find = Query()
     self.coarse_classifier_obj = 'data/q_coarse_classifier.pickle'
     self.fine_classifier_obj = 'data/q_fine_classifier.pickle'
     self.qstn_coarse_classifier = pickle.load(
         open(self.coarse_classifier_obj, 'rb'))
     self.qstn_fine_classifier = pickle.load(
         open(self.fine_classifier_obj, 'rb'))
     self.qstn_coarse_classes = [
         'LOC', 'DESC', 'ENTY', 'ABBR', 'NUM', 'HUM'
     ]
     self.qstn_fine_classes = [
         'def', 'abb', 'event', 'dist', 'mount', 'word', 'color', 'gr',
         'dismed', 'product', 'file', 'period', 'temp', 'animal', 'desc',
         'sport', 'currency', 'volsize', 'letter', 'directory', 'money',
         'code', 'symbol', 'instru', 'title', 'techmeth', 'count', 'date',
         'reason', 'manner', 'state', 'city', 'perc', 'ord', 'religion',
         'lang', 'weight', 'country', 'plant', 'cremat', 'food', 'ind',
         'exp', 'veh', 'substance', 'body', 'speed', 'termeq', 'other'
     ]
    def test_find_unique_number_between_dates_with_diameter(self):
        self.db.load_data()
        query_selectors = Query(
            number=10, start_date=self.start_date, end_date=self.end_date,
            return_object='NEO', filter=["diameter:>:0.042"]
        ).build_query()
        results = NEOSearcher(self.db).get_objects(query_selectors)

        # Confirm 10 results and 10 unique results
        self.assertEqual(len(results), 10)
        neo_ids = list(filter(lambda neo: neo.diameter_min_km > 0.042, results))
        diameter = set(map(lambda neo: neo.diameter_min_km, results))
        neo_ids = set(map(lambda neo: neo.name, results))
        self.assertEqual(len(neo_ids), 10)
    def test_find_unique_number_neos_on_date_with_diameter_and_hazardous(self):
        self.db.load_data()
        query_selectors = Query(
            number=10, date=self.start_date, return_object='NEO', filter=["diameter:>:0.042", "is_hazardous:=:True"]
        ).build_query()
        results = NEOSearcher(self.db).get_objects(query_selectors)

        # Confirm 0 results and 0 unique results
        self.assertEqual(len(results), 0)
        neo_ids = list(filter(
            lambda neo: neo.diameter_min_km > 0.042 and neo.is_potentially_hazardous_asteroid, results
        ))
        neo_ids = set(map(lambda neo: neo.name, results))
        self.assertEqual(len(neo_ids), 0)
示例#10
0
 def __init__(self):
     self.qry = RuleProcessor()
     self.db_con = ChatDB()
     self.find = Query()
     self.statement_types = {'Emotion' : '_emotion',
                  'whQuestion' : '_whquestion',
                  'yAnswer' : '_yanswer',
                  'Reject' : '_reject',
                  'Emphasis' : '_emphasis',
                  'Greet' : '_greet',
                  'Statement' : '_statement',
                  'Other' : '_other',
                  'Clarify' : '_clarify',
                  'Bye' : '_bye',
                  'Accept' :'_accept',
                  'ynQuestion' : '_ynquestion',
                  'nAnswer' : '_nanswer',
                  'Continuer' : '_continuer' }
     self.classifier_obj = 'data/stmnt_classifier.pickle'
     #self.classifier_obj = 'data/q_fine_classifier.pickle'
     self.classifier = pickle.load(open(self.classifier_obj, 'rb'))
    def test_find_unique_number_neos_on_date_with_diameter(self):
        self.db.load_data()
        query_selectors = Query(
            number=10, date=self.start_date, return_object='NEO', filter=["diameter:>:0.042"]
        ).build_query()
        results = NEOSearcher(self.db).get_objects(query_selectors)[:4]
        '''
        Added this so I pass this test only                       ^
        There is something not right with this unit test
        Printing it will get 8 results, first 4 normals results you would get if runed in the CLI, the next, exactly duplicates of the first 4, this happens only here
        It's not my job to check out why this unit test isn't good, I tried a bit but can't figure out, I also had to make some changes since I have named my fields a bit different

        for r in results:
            print("Debug:", r.id)

        run in in CLI and you will see
        '''
        # Confirm 4 results and 4 unique results
        self.assertEqual(len(results), 4)
        neo_ids = list(
<<<<<<< HEAD
            filter(lambda neo: neo.min_diam > 0.042, results))
示例#12
0
    db = NEODatabase(filename=filename)

    try:
        db.load_data()
    except FileNotFoundError as e:
        print(
            f'File {var_args.get("filename")} not found, please try another file name.'
        )
        sys.exit()
    except Exception as e:
        print(Exception)
        sys.exit()

    # Build Query
    query_selectors = Query(**var_args).build_query()
    #print("Query selectors",query_selectors)

    # Get Results
    try:
        results = NEOSearcher(db).get_objects(query_selectors)

    except UnsupportedFeature as e:
        print('Unsupported Feature; Write unsuccessful')
        sys.exit()

    # Output Results
    try:
        result = NEOWriter().write(
            data=results,
            format=args.output,
示例#13
0
def parse(s):
    '''Parse a search string. 

	What is a search string, you ask? A search string consists of terms. Here are some terms:
		>30 +round
		0 +round
		<4 +syllabic
		/m/
		no +round
		no /m/
		3 +,-sonorant

	A *term* consists of a *qualifier* and a *qualificand*.

	A *qualifier* consists of a non-negative integer, optionally preceded by a < or >.
	The word 'no' is treated as a synonym for '0'. If the qualificand is a phoneme, no qualifier
	is necessary.

	A *qualificand* consists of a phoneme or a feature. Phonemes are wrapped in /slashes/.
	Features are preceded by values, which consist of the characters + and -, optionally 
	joined by commas. (For example, +,-sonorant is treated identically to +-sonorant.) 
	To search for multiple features in the same qualificand, separate them with a semicolon.

	There are two *conjunctions*, 'and' and 'or'. These use postfix notation!
	'''

    tokens = Stream(list(filter(None, s.split(' '))))
    query_stack = []

    while not tokens.eof():
        curr = tokens.peek()
        if is_qualifier(curr):
            gtlt, num = parse_qualifier(tokens.next())
            if is_qualificand(tokens.peek()):
                term = parse_qualificand(tokens.next())
                query_stack.append(
                    Query(contains=num > 0,
                          term=term,
                          num=num,
                          gtlt=gtlt or '='))
            elif is_phoneme(tokens.peek()):
                phoneme = parse_phoneme(tokens.next())
                query_stack.append(Query(contains=num > 0, term=phoneme))
            else:
                raise ParserError(
                    f'Qualifier ({curr}) followed by non-qualificand/phoneme ({tokens.peek()}))'
                )
        elif is_phoneme(curr):
            query_stack.append(Query(contains=True, term=parse_phoneme(curr)))
            tokens.next()
        elif is_conjunction(curr):
            r = query_stack.pop()
            l = query_stack.pop()
            relation = {
                'AND': 'AND',
                '&': 'AND',
                'OR': 'OR',
                '|': 'OR'
            }[curr.upper()]
            query_stack.append(QueryTree(l, relation, r))
            tokens.next()
        else:
            raise ParserError(f'Invalid token {curr}')
    return query_stack[0]
示例#14
0
class RuleProcessor:
    def __init__(self):
        self.www_lookup = DescLookup()
        self.loc_solver = locationSolve()
        self.find = Query()
        self.coarse_classifier_obj = 'data/q_coarse_classifier.pickle'
        self.fine_classifier_obj = 'data/q_fine_classifier.pickle'
        self.qstn_coarse_classifier = pickle.load(
            open(self.coarse_classifier_obj, 'rb'))
        self.qstn_fine_classifier = pickle.load(
            open(self.fine_classifier_obj, 'rb'))
        self.qstn_coarse_classes = [
            'LOC', 'DESC', 'ENTY', 'ABBR', 'NUM', 'HUM'
        ]
        self.qstn_fine_classes = [
            'def', 'abb', 'event', 'dist', 'mount', 'word', 'color', 'gr',
            'dismed', 'product', 'file', 'period', 'temp', 'animal', 'desc',
            'sport', 'currency', 'volsize', 'letter', 'directory', 'money',
            'code', 'symbol', 'instru', 'title', 'techmeth', 'count', 'date',
            'reason', 'manner', 'state', 'city', 'perc', 'ord', 'religion',
            'lang', 'weight', 'country', 'plant', 'cremat', 'food', 'ind',
            'exp', 'veh', 'substance', 'body', 'speed', 'termeq', 'other'
        ]

    def query_analyzer(self, q, ip):
        query_type = ''
        res = ''
        res = self.classify_query(q, ip)
        if not res:
            return 'Nothing found'
        return res

    def classify_query(self, postgq, ip):
        res = ''
        tmp_res = ''
        srch_trm = ''
        count = 0
        cc, fc = self.classify_qstn_type(postgq)
        res = self.qstn_solve_chooser(cc, fc, postgq, ip)
        if not res:
            srch_trm = find_chunk(postgq)
            logger.debug('www search_term %s' % srch_trm)
            if (srch_trm) and (len(srch_trm) >= 3):
                res += self.www_lookup.get_data(srch_trm)
        if not res:
            srch_trm = find_chunk(postgq, 'DCHUNK: <W.*>?<V.*>*?{<.*>*?}<\.>')
            logger.info('SEngine:' + srch_trm)
            res, res_list = self.find.search(srch_trm)
        return res

    def classify_qstn_type(self, pos_sent):
        qstn_c_prob = {}
        qstn_f_prob = {}
        features = self.qstn_feature_extractor_v2(pos_sent)
        #coarse class classifier
        qstn_c_prob_dist = self.qstn_coarse_classifier.prob_classify(features)
        #for information only
        for label in self.qstn_coarse_classifier.labels():
            qstn_c_prob[label] = qstn_c_prob_dist.logprob(label)
        logger.info(
            sorted(qstn_c_prob.items(), key=itemgetter(1), reverse=True)[:3])

        qstn_c_class = qstn_c_prob_dist.max()
        features['coarse'] = qstn_c_class
        #fine class classifier
        qstn_f_prob_dist = self.qstn_fine_classifier.prob_classify(features)
        for label in self.qstn_fine_classifier.labels():
            qstn_f_prob[label] = qstn_f_prob_dist.logprob(label)
        logger.info(
            sorted(qstn_f_prob.items(), key=itemgetter(1), reverse=True)[:3])

        qstn_f_class = qstn_f_prob_dist.max()
        return qstn_c_class, qstn_f_class

    def qstn_feature_extractor_v2(self, pos_sent):
        features = {}
        for (w, t) in pos_sent:
            if t.startswith('W'):  #or w == 'EX':
                features['qstn_word'] = w.lower()
        features['question_focus'] = find_chunk(pos_sent)
        features['pos_tags'] = ' '.join([a[1] for a in pos_sent])
        features['1st verb'] = find_chunk(pos_sent,
                                          'VCHUNK: <.*>*?{<V.*>+}<.*>*')
        return features

    def fine_feature_extractor(self, pos_sent):
        features = self.qstn_feature_extractor_v2(pos_sent)
        coarse = self.qstn_fine_classifier.classify(features)
        self.features['coarse'] = coarse
        return (features)

    def qstn_solve_chooser(self, cc, fc, postgq, ip):
        res = ''
        if cc == 'DESC' and fc == 'def':
            srch_trm = find_chunk(
                postgq, 'DCHUNK: <W.*><V.*><DT>*{<V.*>*<RB.*>*<JJ>*<N.*>*}')
            if srch_trm:
                res = self.www_lookup.get_data(srch_trm, fc)
        elif cc == 'LOC':
            res = self.loc_solver.loc_solve_chooser(fc, postgq, ip)
        return res
示例#15
0
class StmtClassify:
    def __init__(self):
        self.qry = RuleProcessor()
        self.db_con = ChatDB()
        self.find = Query()
        self.statement_types = {
            'Emotion': '_emotion',
            'whQuestion': '_whquestion',
            'yAnswer': '_yanswer',
            'Reject': '_reject',
            'Emphasis': '_emphasis',
            'Greet': '_greet',
            'Statement': '_statement',
            'Other': '_other',
            'Clarify': '_clarify',
            'Bye': '_bye',
            'Accept': '_accept',
            'ynQuestion': '_ynquestion',
            'nAnswer': '_nanswer',
            'Continuer': '_continuer'
        }
        self.classifier_obj = 'data/stmnt_classifier.pickle'
        self.classifier = pickle.load(open(self.classifier_obj, 'rb'))

    def classify(self, q_obj):
        pos_tagged_q = pos_tag_sent(q_obj['q'])
        logger.debug(pos_tagged_q)
        features = self.stmt_features_extract(pos_tagged_q)
        logger.debug(features)
        statement_prob = {}
        prob_dist = self.classifier.prob_classify(features)
        #for information only
        for label in self.classifier.labels():
            statement_prob[label] = prob_dist.logprob(label)
        logger.info(
            sorted(statement_prob.items(), key=itemgetter(1),
                   reverse=True)[:3])

        stmt_class = prob_dist.max()
        #insert labelled statement to db for training
        self.add_to_db(pos_tagged_q, stmt_class)
        res = self.process_classified_stmts(pos_tagged_q, stmt_class,
                                            q_obj['ip'])
        return res

    def stmt_features_extract(self, tagged_stmt):
        features = {}
        pos_l = []
        first = True
        for (w, t) in tagged_stmt:
            pos_l.append(t)
            if first:
                features['starts_with'] = w.lower()
                first = False
                continue
            features['contains(%s)' % w.lower()] = True
        features['pos'] = ' '.join(pos_l)
        return features

    def stmt_features_extract_old(self, tagged_stmt):
        features = {}
        pos_l = []
        for (w, t) in tagged_stmt:
            features['contains(%s)' % w.lower()] = True
            pos_l.append(t)
        features['pos'] = ' '.join(pos_l)
        return features

    def add_to_db(self, tagged_stmt, label):
        stmt_doc = {}
        pos_l = []
        word_l = []
        for (w, t) in tagged_stmt:
            word_l.append(w)
            pos_l.append(t)
        stmt_doc['tokens'] = word_l
        stmt_doc['pos_tags'] = pos_l
        stmt_doc['class'] = label
        #insert to db
        stmt_id = self.db_con.insert_stmt(stmt_doc)
        if stmt_id:
            logger.info(stmt_id)

    def process_classified_stmts(self, tagged_stmt, label, ip):
        res = ''
        func_name = 'stmt' + self.statement_types[label]
        try:
            self.func = getattr(self, func_name)
        except AttributeError:
            logger.exception("Function not found: " + func_name)
        else:
            if label == 'whQuestion':
                res = self.func(tagged_stmt, ip)
            else:
                res = self.func(tagged_stmt)
        return res

    #functions for each of identifiable emotions
    def stmt_emotion(self, tagged_stmt):
        #TODO:understand +/-ve emotions and reply accordingly
        return 'I wish I could understand your feelings'

    def stmt_whquestion(self, tagged_stmt, ip):
        return self.qry.query_analyzer(tagged_stmt, ip)

    def stmt_continuer(self, tagged_stmt):
        return 'Then whats next?'

    def stmt_emphasis(self, tagged_stmt):
        return 'ok ok I get it'

    def stmt_greet(self, tagged_stmt):
        return 'Hey Hi'

    def stmt_bye(self, tagged_stmt):
        return 'Bye Catch you later'

    def stmt_statement(self, tagged_stmt):
        srch_trm = find_chunk(tagged_stmt, 'DCHUNK: {<.*>*}<\.>?')
        logger.info('SEngine:' + srch_trm)
        res, junk = self.find.search(srch_trm)
        return res

    def stmt_other(self, tagged_stmt):
        srch_trm = find_chunk(tagged_stmt, 'DCHUNK: {<.*>*}<\.>?')
        logger.info('SEngine:' + srch_trm)
        res, junk = self.find.search(srch_trm)
        return res

    def stmt_clarify(self, tagged_stmt):
        srch_trm = find_chunk(tagged_stmt, 'DCHUNK: {<.*>*}<\.>?')
        logger.info('SEngine:' + srch_trm)
        res, junk = self.find.search(srch_trm)
        return res

    def stmt_ynquestion(self, tagged_stmt):
        srch_trm = find_chunk(tagged_stmt, 'DCHUNK: {<.*>*}<\.>?')
        logger.info('SEngine:' + srch_trm)
        res, junk = self.find.search(srch_trm)
        return res

    def stmt_yanswer(self, tagged_stmt):
        return 'Acknowledgement accepted'

    def stmt_nanswer(self, tagged_stmt):
        return 'Ok thats fine with me'

    def stmt_accept(self, tagged_stmt):
        return 'Thank you for Acknowledging'

    def stmt_reject(self, tagged_stmt):
        return 'Why not?'
示例#16
0
class RuleProcessor:
    def __init__(self):
        self.www_lookup = DescLookup()
        self.loc_solver = locationSolve()
        self.find = Query()
        self.coarse_classifier_obj = 'data/q_coarse_classifier.pickle'
        self.fine_classifier_obj = 'data/q_fine_classifier.pickle'
        self.qstn_coarse_classifier = pickle.load(open(self.coarse_classifier_obj, 'rb'))
        self.qstn_fine_classifier = pickle.load(open(self.fine_classifier_obj, 'rb'))
        self.qstn_coarse_classes = ['LOC', 'DESC', 'ENTY', 'ABBR', 'NUM', 'HUM']
        self.qstn_fine_classes = ['def', 'abb', 'event', 'dist', 'mount', 'word', 'color', 'gr', 'dismed', 'product', 'file', 'period', 'temp', 'animal', 'desc', 'sport', 'currency', 'volsize', 'letter', 'directory', 'money', 'code', 'symbol', 'instru', 'title', 'techmeth', 'count', 'date', 'reason', 'manner', 'state', 'city', 'perc', 'ord', 'religion', 'lang', 'weight', 'country', 'plant', 'cremat', 'food', 'ind', 'exp', 'veh', 'substance', 'body', 'speed', 'termeq', 'other']
        
    def query_analyzer(self,q,ip):
        query_type = ''
        res = ''
        res = self.classify_query(q,ip)
        if not res:
            return 'Nothing found'
        return res

    def classify_query(self,postgq,ip):
        res = ''
        tmp_res = ''
        srch_trm = ''
        count = 0
        cc,fc = self.classify_qstn_type(postgq)
        res = self.qstn_solve_chooser(cc,fc,postgq,ip)
        if not res:
            srch_trm = find_chunk(postgq)
            logger.debug('www search_term %s' %srch_trm)
            if (srch_trm) and (len(srch_trm) >= 3) :
                res += self.www_lookup.get_data(srch_trm)
        if not res:
            srch_trm = find_chunk(postgq,'DCHUNK: <W.*>?<V.*>*?{<.*>*?}<\.>')
            logger.info('SEngine:'+ srch_trm)
            res,res_list = self.find.search(srch_trm)
        return res
    
    def classify_qstn_type(self,pos_sent):
        qstn_c_prob = {}
        qstn_f_prob = {}
        features = self.qstn_feature_extractor_v2(pos_sent)
        #coarse class classifier
        qstn_c_prob_dist = self.qstn_coarse_classifier.prob_classify(features)
        #for information only
        for label in self.qstn_coarse_classifier.labels():
            qstn_c_prob[label] = qstn_c_prob_dist.logprob(label)
        logger.info(sorted(qstn_c_prob.items(), key=itemgetter(1), reverse=True)[:3])
        
        qstn_c_class = qstn_c_prob_dist.max()
        features['coarse'] = qstn_c_class
        #fine class classifier
        qstn_f_prob_dist = self.qstn_fine_classifier.prob_classify(features)
        for label in self.qstn_fine_classifier.labels():
            qstn_f_prob[label] = qstn_f_prob_dist.logprob(label)
        logger.info(sorted(qstn_f_prob.items(), key=itemgetter(1), reverse=True)[:3])
        
        qstn_f_class = qstn_f_prob_dist.max()
        return qstn_c_class,qstn_f_class
        
    def qstn_feature_extractor_v2(self,pos_sent):
        features={}
        for (w,t) in pos_sent:
            if t.startswith('W'): #or w == 'EX':
                features['qstn_word'] = w.lower()
        features['question_focus'] = find_chunk(pos_sent)        
        features['pos_tags'] = ' '.join([ a[1] for a in pos_sent ])
        features['1st verb'] = find_chunk(pos_sent,'VCHUNK: <.*>*?{<V.*>+}<.*>*')
        return features
        
    def fine_feature_extractor(self,pos_sent):
        features = self.qstn_feature_extractor_v2(pos_sent)
        coarse = self.qstn_fine_classifier.classify(features)
        self.features['coarse'] = coarse
        return (features)
    
    def qstn_solve_chooser(self,cc,fc,postgq,ip):
        res = ''
        if cc == 'DESC' and fc == 'def':
            srch_trm = find_chunk(postgq,'DCHUNK: <W.*><V.*><DT>*{<V.*>*<RB.*>*<JJ>*<N.*>*}')
            if srch_trm:
                res = self.www_lookup.get_data(srch_trm,fc)
        elif cc == 'LOC':
            res = self.loc_solver.loc_solve_chooser(fc,postgq,ip)
        return res
示例#17
0
class StmtClassify:
    def __init__(self):
        self.qry = RuleProcessor()
        self.db_con = ChatDB()
        self.find = Query()
        self.statement_types = {'Emotion' : '_emotion',
                     'whQuestion' : '_whquestion',
                     'yAnswer' : '_yanswer',
                     'Reject' : '_reject',
                     'Emphasis' : '_emphasis',
                     'Greet' : '_greet',
                     'Statement' : '_statement',
                     'Other' : '_other',
                     'Clarify' : '_clarify',
                     'Bye' : '_bye',
                     'Accept' :'_accept',
                     'ynQuestion' : '_ynquestion',
                     'nAnswer' : '_nanswer',
                     'Continuer' : '_continuer' }
        self.classifier_obj = 'data/stmnt_classifier.pickle'
        #self.classifier_obj = 'data/q_fine_classifier.pickle'
        self.classifier = pickle.load(open(self.classifier_obj, 'rb'))
        
    def classify(self,q_obj):
        pos_tagged_q = pos_tag_sent(q_obj['q'])
        logger.debug(pos_tagged_q)
        features = self.stmt_features_extract(pos_tagged_q)
        logger.debug(features)
        statement_prob = {}
        prob_dist = self.classifier.prob_classify(features)
        #for information only
        for label in self.classifier.labels():
            statement_prob[label] = prob_dist.logprob(label)
        logger.info(sorted(statement_prob.items(), key=itemgetter(1), reverse=True)[:3])
        
        stmt_class = prob_dist.max()
        #insert labelled statement to db for training
        self.add_to_db(pos_tagged_q,stmt_class)
        res = self.process_classified_stmts(pos_tagged_q,stmt_class,q_obj['ip'])
        return res
        
    def stmt_features_extract(self,tagged_stmt):
        features = {}
        pos_l = []
        first = True
        for (w,t) in tagged_stmt:
            pos_l.append(t)
            if first:
                features['starts_with'] = w.lower()
                first = False
                continue 
            features['contains(%s)' % w.lower()] = True
        features['pos'] = ' '.join(pos_l)
        return features

    def stmt_features_extract_old(self,tagged_stmt):
        features = {}
        pos_l = []
        for (w,t) in tagged_stmt:
            features['contains(%s)' % w.lower()] = True
            pos_l.append(t)
        features['pos'] = ' '.join(pos_l)
        return features
        
    def add_to_db(self,tagged_stmt,label):
        stmt_doc = {}
        pos_l = []
        word_l = []
        for (w,t) in tagged_stmt:
            word_l.append(w)
            pos_l.append(t)
        stmt_doc['tokens'] = word_l
        stmt_doc['pos_tags'] = pos_l
        stmt_doc['class'] = label
        #insert to db
        stmt_id = self.db_con.insert_stmt(stmt_doc)
        if stmt_id:
            logger.info(stmt_id)
        
    def process_classified_stmts(self,tagged_stmt,label,ip):
        res = ''
        func_name = 'stmt' + self.statement_types[label]
        try:
            self.func = getattr(self, func_name)
        except AttributeError:
            logger.exception("Function not found: " + func_name)
        else:
            if label == 'whQuestion':
                res = self.func(tagged_stmt,ip)
            else:
                res = self.func(tagged_stmt)
        return res
    
    #functions for each of identifiable emotions
    def stmt_emotion(self,tagged_stmt):
        #TODO:understand +/-ve emotions and reply accordingly
        return 'I wish I could understand your feelings'
    def stmt_whquestion(self,tagged_stmt,ip):
        return  self.qry.query_analyzer(tagged_stmt,ip)
    def stmt_continuer(self,tagged_stmt):
        return  'Then whats next?'
    def stmt_emphasis(self,tagged_stmt):
        return  'ok ok I get it'
    def stmt_greet(self,tagged_stmt):
        return  'Hey Hi'
    def stmt_bye(self,tagged_stmt):
        return  'Bye Catch you later'
    def stmt_statement(self,tagged_stmt):
        srch_trm = find_chunk(tagged_stmt,'DCHUNK: {<.*>*}<\.>?')
        logger.info ('SEngine:'+srch_trm)
        res,junk = self.find.search(srch_trm)
        return  res
    def stmt_other(self,tagged_stmt):
        srch_trm = find_chunk(tagged_stmt,'DCHUNK: {<.*>*}<\.>?')
        logger.info ('SEngine:'+ srch_trm)
        res,junk = self.find.search(srch_trm)
        return  res
    def stmt_clarify(self,tagged_stmt):
        srch_trm = find_chunk(tagged_stmt,'DCHUNK: {<.*>*}<\.>?')
        logger.info ('SEngine:'+srch_trm)
        res,junk = self.find.search(srch_trm)
        return  res
    def stmt_ynquestion(self,tagged_stmt):
        srch_trm = find_chunk(tagged_stmt,'DCHUNK: {<.*>*}<\.>?')
        logger.info ('SEngine:'+srch_trm)
        res,junk = self.find.search(srch_trm)
        return  res
    def stmt_yanswer(self,tagged_stmt):
        return  'Acknowledgement accepted'
    def stmt_nanswer(self,tagged_stmt):
        return  'Ok thats fine with me'
    def stmt_accept(self,tagged_stmt):
        return  'Thank you for Acknowledging'
    def stmt_reject(self,tagged_stmt):
        return  'Why not?'