示例#1
0
    def test(self):
        input_file = open(self.test_file, 'r', encoding="utf-8")
        output_file = open(self.trace_file, 'w', encoding="utf-8")
        for line in input_file:
            # skip empty lines
            if line is "\n":
                continue

            partitioned_line = line.split(maxsplit=3)
            id = partitioned_line[0]
            actual_language = partitioned_line[2]
            tweet = partitioned_line[3]

            if isinstance(self.vocabulary, CaseInsensitiveAlphabetChars):
                tweet = tweet.lower()

            highest_score = None
            language_with_highest_score = None
            for language in self.training_model.language_data.keys():
                score = self.training_model.get_language_score_of_tweet(language, tweet)
                if highest_score is None or highest_score < score:
                    highest_score = score
                    language_with_highest_score = language

            languages_match = 'correct' if language_with_highest_score == actual_language else 'wrong'
            output_file.write(str.join('  ', [id, language_with_highest_score, str(highest_score), actual_language,
                                              languages_match]) + '\n')
        input_file.close()
        output_file.close()
        eval = Eval(self.trace_file, self.eval_file)
        eval.write_to_file()
示例#2
0
def result(arrayExpected, arrayTest):
    """
    test the model against the given test file
    :param arrayExpected:
    :param arrayTest:
    """
    match_yes = 0
    match_no = 0
    fail_no = 0
    fail_yes = 0
    for _ in range(len(arrayExpected)):
        if arrayExpected[_] != None and arrayTest[_] != None:
            if arrayExpected[_] == arrayTest[_]:
                if arrayExpected[_] == 'yes':
                    match_yes += 1
                else:
                    match_no += 1
            else:
                if arrayExpected[_] == 'yes':
                    fail_yes += 1
                else:
                    fail_no += 1
    # print('Matched values:', match)
    # print('NON-Matched:', fail)
    # print('ID3 Accuracy:', (match / (match + fail)), '%')
    Eval(match_yes, match_no, fail_yes, fail_no)
示例#3
0
def main():
    print('Insert file name of train corpus')
    train = str(input())
    print('Insert file name of test corpus')
    test = str(input())

    dict = Dict.make_dict(train)
    gold_standart = Gold_standart.extract_seg_sent(test)
    test_sent = Test_sent.extact(test)

    def maxmatch(sentence, dictionary):
        global parsed_sent
        if len(sentence) == 0:
            return 'list is empty'
        for i in range(len(sentence), -1, -1):
            firstword = sentence[0:i]
            remainder = sentence[i:]
            if firstword in dictionary:
                parsed_sent.append(firstword)
                return maxmatch(remainder, dictionary)
            if i == 1:
                firstword = sentence[0]
                remainder = sentence[1:]
                parsed_sent.append(remainder)
                parsed_sent.append(firstword)

    def parser(used_dict, sentences):
        global parsed_sent
        res = []
        for sent in sentences:
            maxmatch(sent, used_dict)
            res.append(' '.join(parsed_sent))
            parsed_sent=[]
        print('All sentences were parsed')
        return res

    result = parser(dict, test_sent)
    Eval.score(gold_standart,result)
示例#4
0
def eval_test(method):
    #evaluation
    table_fname = 'precison_recall.txt'
    eval_ = Eval(method, table_fname=table_fname, max_result=10)
    map_ = eval_.MAP()
    mrr_ = eval_.MRR()
    avg_pak = eval_.avg_PatK(5)
    print('map:', map_)
    print('mrr:', mrr_)
    print('avg_pak:', avg_pak)

    eval_.precision_recall(20)
示例#5
0
def naiveBayes(test, train, structFile):
    """
    print the accuracy of the model by test file
    :param test:
    :param train:
    :param structure:
    """

    thisDict = allArraysOfFetures(train, 'class')
    rows = test.shape[0]
    match_yes = 0
    match_no = 0
    fail_no = 0
    fail_yes = 0
    # save model to file
    filename = 'naiveBayes_model.sav'
    joblib.dump(thisDict, filename)

    column = getColumnTitles(test)[:-1]  # clean 'class' column
    for _ in range(rows):

        noPar = 1
        yesPar = 1
        for col in column:
            try:
                index = valuesType(train, col).index(test.iloc[_][col])
                yesPar *= thisDict[(col, 'yes')][index]
                noPar *= thisDict[(col, 'no')][index]
            except:
                continue
        if yesPar > noPar:
            if test.iloc[_]['class'] == 'yes':
                match_yes += 1
            else:
                fail_yes += 1
        else:
            if test.iloc[_]['class'] == 'no':
                match_no += 1
            else:
                fail_no += 1
    #print('naiveBayes accuracy:', ((match_yes+match_no)) / rows), '%')
    Eval(match_yes, match_no, fail_yes, fail_no)
示例#6
0
def K_MeansClass(test, train, struct):
    """
    check k means for each
    @param train:  cvs file for training the module
    @param test:  cvs file for testing the module
    @param struct: text file of the cvs structure
    @return:
    """
    numOfCluster = (int)
    numOfCluster = 5
    column = numericCol(train, struct)  # get column names
    numOfColumn = len(column)
    #train = train.dropna()  # remove NaN raws
    train = train.reset_index(drop=True)
    numOfRow = len(train)
    numericColList = getColList(train, column)  # list of numeric value
    kMeanDict = {}
    for i in range(numOfColumn):
        kMeanDict[column[i]] = (single_kMean(numericColList[i], numOfCluster))

    yesNoDict = makeColDict(column, kMeanDict)  # init YesNo class counter

    # get valss value for each center
    for i in range(numOfRow):
        for col in column:
            if train['class'][i] == 'yes':
                incYes(yesNoDict, col,
                       takeClosest(train[col][i], kMeanDict[col]))
            else:
                incNo(yesNoDict, col, takeClosest(train[col][i],
                                                  kMeanDict[col]))

    # classification dict
    classDict = {}
    tmpDict = {}
    for col in column:
        for center in kMeanDict[col]:
            if yesNoDict[col][center]['yes'] > yesNoDict[col][center]['no']:
                tmpDict[center] = 'yes'
            else:
                tmpDict[center] = 'no'
        classDict[col] = tmpDict

    # test file
    #test = test.dropna()
    test = test.reset_index(drop=True)
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for i in range(len(test)):
        row = test.loc[i, :]  # getRow
        if getClass(classDict, row, column, kMeanDict) == 'yes':
            if test['class'][i] == 'yes':
                tp += 1
            else:
                fp += 1
        else:
            if test['class'][i] == 'yes':
                fn += 1
            else:
                tn += 1
    Eval(tp, tn, fp, fn)

    filename = 'K-means_model.sav'
    joblib.dump(kMeanDict, filename)
    """for i in range(len(train)):
示例#7
0
 def __init__(self, name, extra=None, caller=None):
     self._minEvaluator = Eval('MinimalValue')
     self._maxEvaluator = Eval('MaximalValue')
     RangeIndex.inheritedAttribute('__init__')(self, name, extra, caller)