Пример #1
0
def est_acc(size=1):
    """Returns a list of booleans that shows if classifier guess is correct or not
        
        size is number that document would divide to; it can be used for cross validation 
    """
    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    split_point = int(0.8 * len(all_docs))
    results = []
    train_docs = all_docs[:split_point]
    eval_docs = all_docs[split_point:]

    trained_data_pices = [train_docs[i : i + size] for i in range(0, len(train_docs), size)]

    for n in range(0, size):
        trained_data = assignment1.train_nb(train_docs[n : int((n + 1) * (len(train_docs) / size))])
        results.append(assignment1.evaluate_nb(trained_data, eval_docs))

    return results
Пример #2
0
def cross_val(N=5):
    """Returns Returns a list of booleans that shows if classifier guess is correct or not 
        for whole test iterations
        And it prints confidence interval of whole test iterations

        N is number for iteration in document to divided to training and test parts 
    """
    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    results = []
    for fold_nbr in range(N):
        split_point_1 = int(float(fold_nbr) / N * len(all_docs))
        split_point_2 = int(float(fold_nbr + 1) / N * len(all_docs))
        train_docs = all_docs[:split_point_1] + all_docs[split_point_2:]
        eval_docs = all_docs[split_point_1:split_point_2]
        trained_data = assignment1.train_nb(train_docs)
        for (s, d) in eval_docs:
            results.append(s == assignment1.classify_nb(trained_data, d))
    print acc_ci(results, 0.95)
    return results
Пример #3
0
def cross_val(N=5):
    """Returns Returns a list of booleans that shows if classifier guess is correct or not 
        for whole test iterations
        And it prints confidence interval of whole test iterations

        N is number for iteration in document to divided to training and test parts 
    """
    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    results = []
    for fold_nbr in range(N):
        split_point_1 = int(float(fold_nbr) / N * len(all_docs))
        split_point_2 = int(float(fold_nbr + 1) / N * len(all_docs))
        train_docs = all_docs[:split_point_1] + all_docs[split_point_2:]
        eval_docs = all_docs[split_point_1:split_point_2]
        trained_data = assignment1.train_nb(train_docs)
        for (s, d) in eval_docs:
            results.append(s == assignment1.classify_nb(trained_data, d))
    print acc_ci(results, 0.95)
    return results
Пример #4
0
def est_acc(size=1):
    """Returns a list of booleans that shows if classifier guess is correct or not
        
        size is number that document would divide to; it can be used for cross validation 
    """
    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    split_point = int(0.8 * len(all_docs))
    results = []
    train_docs = all_docs[:split_point]
    eval_docs = all_docs[split_point:]

    trained_data_pices = [
        train_docs[i:i + size] for i in range(0, len(train_docs), size)
    ]

    for n in range(0, size):
        trained_data = assignment1.train_nb(
            train_docs[n:int((n + 1) * (len(train_docs) / size))])
        results.append(assignment1.evaluate_nb(trained_data, eval_docs))

    return results
Пример #5
0
def classify(classifier):
    """Returns a list of booleans that shows if classifier guess is correct or not

        classifier is either assignment1 or scikit classifier 
    """

    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    split_point = int(0.8 * len(all_docs))
    results = []
    train_docs = all_docs[:split_point]
    eval_docs = all_docs[split_point:]
    if classifier == "assignment1":
        trained_data = assignment1.train_nb(train_docs)
        for (s, d) in eval_docs:
            results.append(s == assignment1.classify_nb(trained_data, d))
    elif classifier == "scikit":
        trained_data = ec.train_sk(train_docs)
        for (s, d) in eval_docs:
            results.append(s == ec.classify_sk(d, trained_data))
    else:
        print "Please set classifier as assignment1 or scikit"
    return results
Пример #6
0
def classify(classifier):
    """Returns a list of booleans that shows if classifier guess is correct or not

        classifier is either assignment1 or scikit classifier 
    """

    all_docs = assignment1.read_corpus("all_sentiment_shuffled.txt")
    all_docs = [(sentiment, doc) for (_, sentiment, doc) in all_docs]
    split_point = int(0.8 * len(all_docs))
    results = []
    train_docs = all_docs[:split_point]
    eval_docs = all_docs[split_point:]
    if classifier == "assignment1":
        trained_data = assignment1.train_nb(train_docs)
        for (s, d) in eval_docs:
            results.append(s == assignment1.classify_nb(trained_data, d))
    elif classifier == "scikit":
        trained_data = ec.train_sk(train_docs)
        for (s, d) in eval_docs:
            results.append(s == ec.classify_sk(d, trained_data))
    else:
        print "Please set classifier as assignment1 or scikit"
    return results