'Perfect Match': 0}

	for event in set(event_dict.values()):
		test_rows = np.argwhere(full_event == event)
		train_rows = np.argwhere(full_event != event)
		
		text_test = [full_text[n[0]] for n in test_rows]
		cat_test = [full_cat[n[0]] for n in test_rows]
		
		text_train = [full_text[n[0]] for n in train_rows]
		cat_train = [full_cat[n[0]] for n in train_rows]

		cat_test_arr = np.array(cat_test, dtype=np.float64)
		clas = Classify(text_train, cat_train, 2000, 
			model = LinearSVC(C = c,
				class_weight = weight,
				loss = loss,
				random_state=1))

		predict = clas.predict(text_test)
		simp = clas.simple_evaluation(cat_test, predict)

		for key in simp:
			if key in model_res:
				model_res[key] += simp[key]

	stats = clas.stats_calc(model_res['True Positive'], 
	            model_res['True Negative'], model_res['False Positive'], 
	            model_res['False Negative'], model_res['One Label'], 
	            model_res['Perfect Match'])
示例#2
0
    for event in set(event_dict.values()):
        test_rows = np.argwhere(full_event == event)
        train_rows = np.argwhere(full_event != event)

        text_test = [full_text[n[0]] for n in test_rows]
        cat_test = [full_cat[n[0]] for n in test_rows]

        text_train = [full_text[n[0]] for n in train_rows]
        cat_train = [full_cat[n[0]] for n in train_rows]

        cat_test_arr = np.array(cat_test, dtype=np.float64)
        clas = Classify(text_train,
                        cat_train,
                        2000,
                        model=RandomForestClassifier(random_state=1,
                                                     n_jobs=4,
                                                     n_estimators=estimators,
                                                     class_weight=weight,
                                                     criterion=criterion,
                                                     bootstrap=bootstrap))

        predict = clas.predict(text_test)
        simp = clas.evaluate(cat_test, predict)

        for key in simp:
            if key in model_res:
                model_res[key] += simp[key]

    stats = clas.stats_calc(model_res['True Positive'],
                            model_res['True Negative'],
                            model_res['False Positive'],
full_text = list()
full_cat = list()
for id in (sorted(text_dict.keys())):
    full_text.append(text_dict[id])
    full_cat.append(cat_dict[id])

text_train, text_test, cat_train, cat_test = train_test_split(full_text,
                                                              full_cat,
                                                              test_size=.1,
                                                              random_state=1)

cat_test_arr = np.array(cat_test, dtype=np.float64)

if pretrained:
    clas = Classify(pretrained='pretrained/')
elif classifier == 'rf':
    clas = Classify(text_train,
                    cat_train,
                    model=RandomForestClassifier(class_weight='balanced',
                                                 n_estimators=100))
elif classifier == 'svc':
    clas = Classify(text_train, cat_train, model=SVC(class_weight='balanced'))
elif classifier == 'linearsvc':
    clas = Classify(text_train,
                    cat_train,
                    model=LinearSVC(class_weight='balanced'))
else:
    clas = Classify(text_train, cat_train)
predict = clas.predict(text_test)
evals = clas.evaluation_(
示例#4
0
    }

    for event in set(event_dict.values()):
        test_rows = np.argwhere(full_event == event)
        train_rows = np.argwhere(full_event != event)

        text_test = [full_text[n[0]] for n in test_rows]
        cat_test = [full_cat[n[0]] for n in test_rows]

        text_train = [full_text[n[0]] for n in train_rows]
        cat_train = [full_cat[n[0]] for n in train_rows]

        cat_test_arr = np.array(cat_test, dtype=np.float64)
        clas = Classify(text_train,
                        cat_train,
                        1000,
                        model=KNeighborsClassifier(n_neighbors=k,
                                                   weights=weight,
                                                   n_jobs=2))

        predict = clas.predict(text_test)
        simp = clas.simple_evaluation(cat_test, predict)

        for key in simp:
            if key in model_res:
                model_res[key] += simp[key]

    stats = clas.stats_calc(model_res['True Positive'],
                            model_res['True Negative'],
                            model_res['False Positive'],
                            model_res['False Negative'],
                            model_res['One Label'], model_res['Perfect Match'])
示例#5
0
        'Perfect Match': 0
    }

    for event in set(event_dict.values()):
        test_rows = np.argwhere(full_event == event)
        train_rows = np.argwhere(full_event != event)

        text_test = [full_text[n[0]] for n in test_rows]
        cat_test = [full_cat[n[0]] for n in test_rows]

        text_train = [full_text[n[0]] for n in train_rows]
        cat_train = [full_cat[n[0]] for n in train_rows]

        cat_test_arr = np.array(cat_test, dtype=np.float64)
        clas = Classify(text_train,
                        cat_train,
                        2000,
                        model=BernoulliNB(alpha=alpha))

        predict = clas.predict(text_test)
        simp = clas.evaluate(cat_test, predict)

        for key in simp:
            if key in model_res:
                model_res[key] += simp[key]

    stats = clas.stats_calc(model_res['True Positive'],
                            model_res['True Negative'],
                            model_res['False Positive'],
                            model_res['False Negative'],
                            model_res['One Label'], model_res['Perfect Match'])
示例#6
0
        'Perfect Match': 0
    }

    for event in set(event_dict.values()):
        test_rows = np.argwhere(full_event == event)
        train_rows = np.argwhere(full_event != event)

        text_test = [full_text[n[0]] for n in test_rows]
        cat_test = [full_cat[n[0]] for n in test_rows]

        text_train = [full_text[n[0]] for n in train_rows]
        cat_train = [full_cat[n[0]] for n in train_rows]

        cat_test_arr = np.array(cat_test, dtype=np.float64)
        clas = Classify(text_train,
                        cat_train,
                        2000,
                        model=LogisticRegression(C=c, class_weight=weight))

        predict = clas.predict(text_test)
        simp = clas.evaluate(cat_test, predict)

        for key in simp:
            if key in model_res:
                model_res[key] += simp[key]

    stats = clas.stats_calc(model_res['True Positive'],
                            model_res['True Negative'],
                            model_res['False Positive'],
                            model_res['False Negative'],
                            model_res['One Label'], model_res['Perfect Match'])
full_cat_confusion = dict()
for event in set(event_dict.values()):
    print("Event: ", event)

    test_rows = np.argwhere(full_event == event)
    train_rows = np.argwhere(full_event != event)

    text_test = [full_text[n[0]] for n in test_rows]
    cat_test = [full_cat[n[0]] for n in test_rows]

    text_train = [full_text[n[0]] for n in train_rows]
    cat_train = [full_cat[n[0]] for n in train_rows]

    cat_test_arr = np.array(cat_test, dtype=np.float64)
    if pretrained:
        clas = Classify(pretrained='pretrained/')
    elif classifier == 'rf':
        clas = Classify(text_train, cat_train, 2000,
                        model=RandomForestClassifier(class_weight='balanced', n_estimators=1))
    elif classifier == 'lsvc':
        clas = Classify(text_train, cat_train, 2000,
                        model=LinearSVC(C=0.01,
                                        class_weight='balanced',
                                        loss='hinge',
                                        random_state=1))
    elif classifier == 'log':
        clas = Classify(text_train, cat_train, 2000,
                        model=LogisticRegression(class_weight='balanced', C=0.01))
    else:
        clas = Classify(text_train, cat_train, 2000)
    predict = clas.predict(text_test)
示例#8
0
for event in set(event_dict.values()):
	print("\n\n")
	print("Event: ", event)

	test_rows = np.argwhere(full_event == event)
	train_rows = np.argwhere(full_event != event)
	
	text_test = [full_text[n[0]] for n in test_rows]
	cat_test = [full_cat[n[0]] for n in test_rows]
	
	text_train = [full_text[n[0]] for n in train_rows]
	cat_train = [full_cat[n[0]] for n in train_rows]

	cat_test_arr = np.array(cat_test, dtype=np.float64)
	if pretrained:
		clas = Classify(pretrained='pretrained/')
	elif classifier == 'rf':
		clas = Classify(text_train, cat_train, 2000,
			model=RandomForestClassifier(class_weight='balanced', 
				n_estimators=100))
	elif classifier == 'svc':
		clas = Classify(text_train, cat_train, 2000,
			model=SVC(class_weight='balanced'))
	elif classifier == 'linearsvc':
		clas = Classify(text_train, cat_train, 2000,
			model=LinearSVC(class_weight='balanced'))
	elif classifier == 'log':
		clas = Classify(text_train, cat_train, 2000,
			model=LogisticRegression(class_weight='balanced'))
	else:
		clas = Classify(text_train, cat_train, 2000)