Пример #1
0
 def _validate(self, criteria, toValidate):
     if None == criteria:
         PresentValidator._LOGGER.debug("None criteria, so returning True")
         return True
     elif None == toValidate:
         PresentValidator._LOGGER.debug("None toValidate, so returning False")
         return False
     elif not 'name' in criteria:
         PresentValidator._LOGGER.debug("'name' not in criteria, so returning False")
         return False
     elif None == DictUtils.defaultIfNone(toValidate, None, criteria['name']):
         PresentValidator._LOGGER.debug("criteria['name']: " + criteria['name'] + " not in toValidate, so returning False")
         return False
     elif not 'expected' in criteria:
         PresentValidator._LOGGER.debug("no expected found in criteria, so returning True")
         return True
     else:
         toValidateVal = DictUtils.defaultIfNone(toValidate, None, criteria['name'])
         if list == type(criteria['expected']):
             PresentValidator._LOGGER.debug("will match nested criteria later")
             return True
         elif str == type(criteria['expected']):
             PresentValidator._LOGGER.debug("matching string")
             return criteria['expected'] == toValidateVal
         elif type(criteria['expected']) != type(toValidateVal):
             PresentValidator._LOGGER.debug("type mismatch criteria[expected]:" + str(type(criteria['expected'])) + ", toValidate['" + criteria['name'] + "']: " + str(type(toValidateVal)) + ", so returning False")
             return False
         else:
             PresentValidator._LOGGER.debug("oops! validation failed")
             return False
Пример #2
0
def create_dicts(sentences):
    dict_q = dict()
    dict_e = dict()
    for sentence in sentences:
        DictUtils.insert_sentence_to_dicts(dict_q, dict_e, sentence)

    dict_e = DictUtils.convert_rare_words(dict_e)
    DictUtils.update_dict(dict_q, 'ALL', sum(dict_e.values()))
    return dict_q, dict_e
Пример #3
0
def main(input_file_name, q_mle, e_mle, greedy_hmm_output, extra_file_name):
    start = datetime.now()
    sentences = FileUtils.read_lines(input_file_name)
    dict_q = DictUtils.convert_line_to_dict(FileUtils.read_lines(q_mle))
    dict_e = DictUtils.convert_line_to_dict(FileUtils.read_lines(e_mle))
    unk_tag_list = DictUtils.possible_tags(UNK, dict_e)
    tagged_text = greedy(sentences, dict_q, dict_e, unk_tag_list)
    FileUtils.write_tagged_text(greedy_hmm_output, tagged_text)
    end = datetime.now()
    print('Running Time: {0}'.format(end - start))
Пример #4
0
def main(input_file_name, q_mle, e_mle, hmm_viterbi_predictions, extra_file_name):
    start = datetime.now()

    sentences = FileUtils.read_lines(input_file_name)
    dict_q = DictUtils.convert_line_to_dict(FileUtils.read_lines(q_mle))
    dict_e = DictUtils.convert_line_to_dict(FileUtils.read_lines(e_mle))
    unk_tag_list = DictUtils.possible_tags('*UNK*', dict_e)
    tagged_text = viterbi(sentences, dict_q, dict_e, unk_tag_list)
    FileUtils.write_tagged_text(hmm_viterbi_predictions, tagged_text)

    end = datetime.now()
    print('Running Time: {0}'.format(end - start))
Пример #5
0
def memm_greedy(sentences, max_sentence_len, features_map, counters_dict, clf):
    all_words_features = list()
    sentences_predictions = list()
    for j in range(len(sentences)):
        sentences_predictions.append(list())

    for i in range(max_sentence_len):
        start = datetime.now()
        for j in range(len(sentences)):
            sentence = sentences[j]
            if i < len(sentence):
                word = sentence[i]
                word_features = FeaturesUtils.get_word_features(
                    i, sentence, sentences_predictions[j],
                    DictUtils.is_rare(counters_dict, word))
                all_words_features.append(word_features)
            else:
                all_words_features.append(dict())

        all_prediction_word_i = get_prediction_of_all_words(
            all_words_features, clf, features_map)

        end = datetime.now()
        print('word i={1} Running Time: {0}'.format(end - start, i))

        for j in range(len(all_prediction_word_i)):
            sentences_predictions[j].append(all_prediction_word_i[j])
        all_words_features = list()

    return sentences_predictions
Пример #6
0
 def validate(criteria, toValidate):
     ValidatorFactory._LOGGER.debug("criteria: " + str(criteria))
     ValidatorFactory._LOGGER.debug("toValidate: " + str(toValidate))
     if None == criteria:
         ValidatorFactory._LOGGER.debug("None criteria, so returning True")
         return True
     if not 'check' in criteria:
         ValidatorFactory._LOGGER.debug("'check' not in criteria, so returning True, criteria: " + str(criteria))
         return True
     validator = ValidatorFactory.getValidator(criteria['check'])
     if None == validator:
         ValidatorFactory._LOGGER.error("None validator found for, so returning False, check: " + criteria['check'])
         return False
     isValid = validator.validate(criteria, toValidate)
     ValidatorFactory._LOGGER.debug("isValid: " + str(isValid))
     if not isValid:
         return False
     if isinstance(validator, PresentValidator) and 'expected' in criteria and list == type(criteria['expected']):
         ValidatorFactory._LOGGER.debug("will match nested criteria now ...")
         for nextCriteria in criteria['expected']:
             ValidatorFactory._LOGGER.debug("nextCriteria:" + str(nextCriteria))
             if not 'check' in nextCriteria:
                 continue
             if not 'name' in nextCriteria:
                 ValidatorFactory._LOGGER.error("no name property found for check: " + str(nextCriteria))
                 return False
             nextToValidate = DictUtils.defaultIfNone(toValidate, None, criteria['name'])
             ValidatorFactory._LOGGER.debug("nextToValidate:" + str(nextToValidate))
             isCheckValid = ValidatorFactory.validate(nextCriteria, nextToValidate)
             if not isCheckValid:
                 return False
     return True
Пример #7
0
def create_features(words_features_list, words, tags, dict_e):
    for i in range(len(words)):
        is_rare = DictUtils.is_rare(dict_e, (words[i], tags[i]))
        word_feature_dict = FeaturesUtils.get_word_features(
            i, words, tags, is_rare)
        word_feature_dict['tag'] = tags[i]
        words_features_list.append(word_feature_dict)
        print(word_feature_dict)
Пример #8
0
def get_word_signatures_tag(word, dict_e, unk_tag_list):
    signatures = WordSignatures.get_word_signatures(word)
    if signatures == [word.lower()]:
        return {UNK: unk_tag_list}
    else:
        signatures_tags = dict()
        for signature in signatures:
            signatures_tags[signature] = DictUtils.possible_tags(signature, dict_e)
        return signatures_tags
Пример #9
0
def convert_line_to_lists(line):
    words = list()
    tags = list()
    for tuple in line.split(' '):
        word, tag = DictUtils.split_tuple(tuple)
        words.append(word)
        tags.append(tag)

    return words, tags
Пример #10
0
def main(features_file, model_file, feature_map_file):
    start = datetime.now()
    all_features, labels = FileUtils.read_features(features_file)
    counters_dict, word_tag_dict, unk_tad_dict = DictUtils.extract_features(
        all_features, labels)
    transform_of_features, features_map, model = create_features_format(
        all_features, labels)
    FileUtils.write_feature_map(feature_map_file, features_map, counters_dict)
    FileUtils.write_logistic_regression_model(model_file, model)
    end = datetime.now()
    print('Running Time: {0}'.format(end - start))
Пример #11
0
def get_word_features_list(i, words, prev_predictions, prev_prev_predictions,
                           counters_dict):
    all_word_features = list()
    prev_list = list()
    for prev_prediction in prev_predictions:
        for prev_prev_prediction in prev_prev_predictions:
            word_features = FeaturesUtils.get_word_features(
                i, words, get_tag_list(i, prev_prediction,
                                       prev_prev_prediction),
                DictUtils.is_rare(counters_dict, words[i]))
            all_word_features.append(word_features)
            prev_list.append((prev_prediction, prev_prev_prediction))
    return all_word_features, prev_list
Пример #12
0
 def run(self):
     Tester.__LOGGER.info("in run")
     if not 'steps' in self.__config:
         Tester.__LOGGER.info("no test steps to execute")
         return
     default = DictUtils.defaultIfNone(self.__config, None, 'default')
     control = {'loop':{'running': False, 'count': 0, 'steps': []}, 
                'session':{'running': False, 'steps': {}},
                'result':{'total':{'count':0, 'time':0},
                          'passed':{'count':0, 'time':0},
                          'failed':{'count':0, 'time':0},
                          'steps':{}
                          }
                }
     for step in self.__config['steps']:
         if False == self.__isValidStep(step):
             continue
         executor = ExecutorFactory.getExecutor(step['construct'])
         if None == executor:
             Tester.__LOGGER.error("no executor found for construct: " + step['construct'])
             continue
         executor.execute(default, step, control)
         if isinstance(executor, EndLoopExecutor):
             while control['loop']['running']:
                 for tStep in control['loop']['steps']:
                     tStep['executor'].execute(default, tStep['step'], control)
                 executor.execute(default, step, control)
     
     Tester.__LOGGER.info("================================")
     Tester.__LOGGER.info("[SUMMARY JSON]")
     Tester.__LOGGER.info(str(control['result']))
     Tester.__LOGGER.info("================================")
     
     Tester.__LOGGER.info("================================")
     Tester.__LOGGER.info("[SUMMARY]")
     Tester.__LOGGER.info(self.__formatResultSeparator())
     Tester.__LOGGER.info(self.__formatResultHead1())
     Tester.__LOGGER.info(self.__formatResultSeparator())
     Tester.__LOGGER.info(self.__formatResultHead2())
     Tester.__LOGGER.info(self.__formatResultSeparator())
     for step in self.__config['steps']:
         if not 'sid' in step:
             continue
         sid = step['sid']
         sidData = control['result']['steps'][sid]
         Tester.__LOGGER.info(self.__formatResultStr(sid, sidData))
     Tester.__LOGGER.info(self.__formatResultSeparator())
     Tester.__LOGGER.info(self.__formatResultStr('OVERALL', control['result']))
     Tester.__LOGGER.info(self.__formatResultSeparator())
     Tester.__LOGGER.info("================================")    
Пример #13
0
def main(input_file_name, model_file_name, feature_map_file, output_file_name):
    start = datetime.now()

    clf, vec = FileUtils.read_logistic_regression_model(model_file_name)
    classes = clf.classes_.tolist()
    sentences = FileUtils.read_lines(input_file_name)
    feature_map_lines = FileUtils.read_lines(feature_map_file)
    features_map, counters_dict = DictUtils.create_features_dicts(
        feature_map_lines)

    tagged_text = viterbi(sentences, features_map, counters_dict, clf, classes)
    FileUtils.write_tagged_text(output_file_name, tagged_text)

    end = datetime.now()
    print('Running Time: {0}'.format(end - start))
Пример #14
0
 def _validate(self, criteria, toValidate):
     if None == criteria:
         ExactValidator._LOGGER.debug("None criteria, so returning True")
         return True
     elif None == toValidate:
         ExactValidator._LOGGER.debug("None toValidate, so returning False")
         return False
     elif not 'name' in criteria:
         ExactValidator._LOGGER.debug("'name' not in criteria, so returning False")
         return False
     elif None == DictUtils.defaultIfNone(toValidate, None, criteria['name']):
         ExactValidator._LOGGER.debug("criteria['name']: " + criteria['name'] + " not in toValidate, so returning False")
         return False
     elif not 'expected' in criteria:
         ExactValidator._LOGGER.debug("no expected found in criteria, so returning True")
         return True
     else:
         toValidateVal = DictUtils.defaultIfNone(toValidate, None, criteria['name'])
         if type(criteria['expected']) != type(toValidateVal):
             ExactValidator._LOGGER.debug("type mismatch, so returning True - type(criteria['expected']): " + str(type(criteria['expected'])) + ", type(toValidate['" + criteria['name'] + "']): " + str(type(toValidateVal)))
             return False
         else:
             ExactValidator._LOGGER.debug("matching string criteria['expected']: " + str(criteria['expected']) + ", toValidate['" + criteria['name'] + "']: " + str(toValidateVal))
             return criteria['expected'] == toValidateVal
Пример #15
0
def main(input_file_name, model_file_name, feature_map_file, output_file_name):
    start = datetime.now()

    clf, vec = FileUtils.read_logistic_regression_model(model_file_name)
    sentences, max_sentence_len = FileUtils.read_sentences(input_file_name)
    feature_map_lines = FileUtils.read_lines(feature_map_file)
    features_map, counters_dict = DictUtils.create_features_dicts(
        feature_map_lines)

    sentences_predictions = memm_greedy(sentences, max_sentence_len,
                                        features_map, counters_dict, clf)
    FileUtils.write_prediction(output_file_name, sentences,
                               sentences_predictions)

    end = datetime.now()
    print('Running Time: {0}'.format(end - start))
Пример #16
0
def compute_q(dict_q, tag, prev_tag, prev_prev_tag, lr1=0.90, lr2=0.09, lr3=0.01):
    if lr1 + lr2 + lr3 != 1.0:
        raise Exception('summing factors should be 1 !!!')

    prob1 = MathUtils.calc_fraction(DictUtils.get_value(dict_q, (tag, prev_tag, prev_prev_tag)),
                                    DictUtils.get_value(dict_q, (prev_tag, prev_prev_tag)))
    prob2 = MathUtils.calc_fraction(DictUtils.get_value(dict_q, (tag, prev_tag)),
                                    DictUtils.get_value(dict_q, prev_tag))
    prob3 = MathUtils.calc_fraction(DictUtils.get_value(dict_q, tag),
                                    DictUtils.get_value(dict_q, 'ALL'))

    return lr1 * prob1 + lr2 * prob2 + lr3 * prob3
Пример #17
0
         response = ""
 else:
     isSuccess = True
     statusCode = res.getcode()
     response = res.read()
 
 endTime = time.time()
 timeTaken = (endTime - startTime)
     
 TestExecutor._LOGGER.info("statusCode: " + str(statusCode))
 TestExecutor._LOGGER.info("response: " + str(response))
 
 try:
     jsonRes = json.loads(response)
     TestExecutor._LOGGER.debug("jsonRes: " + str(jsonRes))
     responseDict = DictUtils.convert(jsonRes)
     TestExecutor._LOGGER.debug("responseDict: " + str(responseDict))
 except ValueError, e:
     TestExecutor._LOGGER.debug("caught exception e:" + str(e))
     responseDict = None
 except TypeError, e:
     TestExecutor._LOGGER.debug("caught exception e:" + str(e))
     responseDict = None
 
 if control['session']['running']:
     control['session']['steps'].update({sid: {'IN':inputData, 'SC':statusCode, 'OUT':responseDict}})
     TestExecutor._LOGGER.debug("updated session: " + str(control['session'])) 
 
 if not isSuccess:
     self.__recordHit(control, sid, timeTaken, False)
     return
Пример #18
0
def possible_tags(word, dict_e, unk_tag_list):
    words_tags = DictUtils.possible_tags(word, dict_e)
    if len(words_tags) == 0:
        return get_word_signatures_tag(word, dict_e, unk_tag_list)
    else:
        return {word: words_tags}
Пример #19
0
def compute_e(word, tag, dict_q, dict_e):
    counter = DictUtils.get_value(dict_e, (word, tag))
    denominator = DictUtils.get_value(dict_q, tag)
    return MathUtils.calc_fraction(counter, denominator)
Пример #20
0
 def __init__(self, configFilePath):
     Tester.__LOGGER.debug("created Tester")
     with open(configFilePath, 'r') as configFile:
         self.__config = DictUtils.convert(json.load(configFile))
     Tester.__LOGGER.debug("loaded config from: " + configFilePath)
Пример #21
0
 def _execute(self, default, step, control):
     if control['loop']['running'] and 0 == control['loop']['count']:
         control['loop']['steps'].append({'step':step, 'executor':self})
     
     sid = DictUtils.defaultIfNone(step, None, 'sid')    
     if None == sid:
         TestExecutor._LOGGER.error("missing id for step: " + str(step))
         sys.exit(1)
     host = DictUtils.defaultIfNone(step, default, 'host')
     path = DictUtils.defaultIfNone(step, default, 'path')
     method = DictUtils.defaultIfNone(step, default, 'method')
     commonInputData = DictUtils.defaultIfNone(None, default, 'input')
     inputData = DictUtils.defaultIfNone(step, default, 'input')
     
     if None != path:
         if path.startswith('/'):
             url = host + path
         else:
             url = host + '/' + path
     else:
         url = host
     
     url = self.__detemplatizeStr(url, control)
     TestExecutor._LOGGER.debug("url: " + url)
     
     if None == inputData:
         inputData = commonInputData
     elif None != commonInputData:
         inputData.update(commonInputData)
     
     if None != inputData:
         inputData = self.__detemplatize(inputData, control, boolToStr=True)
         # data = DictUtils.recursiveUrlencode(inputData)
     else:
         inputData = dict()
     
     TestExecutor._LOGGER.debug("request inputData: " + str(inputData))
     
     startTime = time.time()
     
     try:
         if 'POST' == method:
             #res = urllib2.urlopen(url, data)
             files = self.__extractFiles(inputData)
             data, headers = DictUtils.encode_multipart(inputData, files)
             if None == files or 0 == len(files):
                 TestExecutor._LOGGER.debug("request data: " + str(data))
             else:
                 TestExecutor._LOGGER.debug("request data: SOME POST DATA with files (won't log)")
             req = urllib2.Request(url, data=data, headers=headers)
             res = urllib2.urlopen(req)
         else:
             data = DictUtils.recursiveUrlencode(inputData)
             TestExecutor._LOGGER.debug("request data: " + data)
             url += "?" + data
             res = urllib2.urlopen(url)
     except IOError, e:
         TestExecutor._LOGGER.debug("caught exception e:" + str(e))
         isSuccess = False
         if hasattr(e, 'code'):
             statusCode = e.code
         else:
             statusCode = 500
         if hasattr(e, 'reason'):
             response = e.reason
         else:
             response = ""