Пример #1
0
def eval_by_labale(lable, docs, trained_data):
    """Calculate precision and recall and return it as dictionary structure
    """
    corr_lab = [
        (l, doc) for (l, doc) in docs if l == lable and lable == assignment1.classify_nb(trained_data, doc)
    ]  # number of labeled docs which is correct
    gss_lab = [
        (l, doc) for (l, doc) in docs if lable == assignment1.classify_nb(trained_data, doc)
    ]  # number of labeled docs which is correct
    all_lab = [(l, doc) for (l, doc) in docs if l == lable]  # number of labeled docs which is correct
    return {"precision": len(corr_lab) / len(gss_lab), "recall": len(corr_lab) / len(all_lab)}
Пример #2
0
def eval_by_labale(lable, docs, trained_data):
    """Calculate precision and recall and return it as dictionary structure
    """
    corr_lab = [
        (l, doc) for (l, doc) in docs
        if l == lable and lable == assignment1.classify_nb(trained_data, doc)
    ]  # number of labeled docs which is correct
    gss_lab = [(l, doc) for (l, doc) in docs
               if lable == assignment1.classify_nb(trained_data, doc)
               ]  # number of labeled docs which is correct
    all_lab = [(l, doc) for (l, doc) in docs
               if l == lable]  # number of labeled docs which is correct
    return {
        "precision": len(corr_lab) / len(gss_lab),
        "recall": len(corr_lab) / len(all_lab)
    }
Пример #3
0
def cross_val(N=5):
    """Returns Returns a list of booleans that shows if classifier guess is correct or not 
        for whole test iterations
        And it prints confidence interval of whole test iterations

        N is number for iteration in document to divided to training and test parts 
    """
    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    results = []
    for fold_nbr in range(N):
        split_point_1 = int(float(fold_nbr) / N * len(all_docs))
        split_point_2 = int(float(fold_nbr + 1) / N * len(all_docs))
        train_docs = all_docs[:split_point_1] + all_docs[split_point_2:]
        eval_docs = all_docs[split_point_1:split_point_2]
        trained_data = assignment1.train_nb(train_docs)
        for (s, d) in eval_docs:
            results.append(s == assignment1.classify_nb(trained_data, d))
    print acc_ci(results, 0.95)
    return results
Пример #4
0
def cross_val(N=5):
    """Returns Returns a list of booleans that shows if classifier guess is correct or not 
        for whole test iterations
        And it prints confidence interval of whole test iterations

        N is number for iteration in document to divided to training and test parts 
    """
    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    results = []
    for fold_nbr in range(N):
        split_point_1 = int(float(fold_nbr) / N * len(all_docs))
        split_point_2 = int(float(fold_nbr + 1) / N * len(all_docs))
        train_docs = all_docs[:split_point_1] + all_docs[split_point_2:]
        eval_docs = all_docs[split_point_1:split_point_2]
        trained_data = assignment1.train_nb(train_docs)
        for (s, d) in eval_docs:
            results.append(s == assignment1.classify_nb(trained_data, d))
    print acc_ci(results, 0.95)
    return results
Пример #5
0
def classify(classifier):
    """Returns a list of booleans that shows if classifier guess is correct or not

        classifier is either assignment1 or scikit classifier 
    """

    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    split_point = int(0.8 * len(all_docs))
    results = []
    train_docs = all_docs[:split_point]
    eval_docs = all_docs[split_point:]
    if classifier == "assignment1":
        trained_data = assignment1.train_nb(train_docs)
        for (s, d) in eval_docs:
            results.append(s == assignment1.classify_nb(trained_data, d))
    elif classifier == "scikit":
        trained_data = ec.train_sk(train_docs)
        for (s, d) in eval_docs:
            results.append(s == ec.classify_sk(d, trained_data))
    else:
        print "Please set classifier as assignment1 or scikit"
    return results
Пример #6
0
def classify(classifier):
    """Returns a list of booleans that shows if classifier guess is correct or not

        classifier is either assignment1 or scikit classifier 
    """

    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    split_point = int(0.8 * len(all_docs))
    results = []
    train_docs = all_docs[:split_point]
    eval_docs = all_docs[split_point:]
    if classifier == "assignment1":
        trained_data = assignment1.train_nb(train_docs)
        for (s, d) in eval_docs:
            results.append(s == assignment1.classify_nb(trained_data, d))
    elif classifier == "scikit":
        trained_data = ec.train_sk(train_docs)
        for (s, d) in eval_docs:
            results.append(s == ec.classify_sk(d, trained_data))
    else:
        print "Please set classifier as assignment1 or scikit"
    return results