'Perfect Match': 0} for event in set(event_dict.values()): test_rows = np.argwhere(full_event == event) train_rows = np.argwhere(full_event != event) text_test = [full_text[n[0]] for n in test_rows] cat_test = [full_cat[n[0]] for n in test_rows] text_train = [full_text[n[0]] for n in train_rows] cat_train = [full_cat[n[0]] for n in train_rows] cat_test_arr = np.array(cat_test, dtype=np.float64) clas = Classify(text_train, cat_train, 2000, model = LinearSVC(C = c, class_weight = weight, loss = loss, random_state=1)) predict = clas.predict(text_test) simp = clas.simple_evaluation(cat_test, predict) for key in simp: if key in model_res: model_res[key] += simp[key] stats = clas.stats_calc(model_res['True Positive'], model_res['True Negative'], model_res['False Positive'], model_res['False Negative'], model_res['One Label'], model_res['Perfect Match'])
for event in set(event_dict.values()): test_rows = np.argwhere(full_event == event) train_rows = np.argwhere(full_event != event) text_test = [full_text[n[0]] for n in test_rows] cat_test = [full_cat[n[0]] for n in test_rows] text_train = [full_text[n[0]] for n in train_rows] cat_train = [full_cat[n[0]] for n in train_rows] cat_test_arr = np.array(cat_test, dtype=np.float64) clas = Classify(text_train, cat_train, 2000, model=RandomForestClassifier(random_state=1, n_jobs=4, n_estimators=estimators, class_weight=weight, criterion=criterion, bootstrap=bootstrap)) predict = clas.predict(text_test) simp = clas.evaluate(cat_test, predict) for key in simp: if key in model_res: model_res[key] += simp[key] stats = clas.stats_calc(model_res['True Positive'], model_res['True Negative'], model_res['False Positive'],
full_text = list() full_cat = list() for id in (sorted(text_dict.keys())): full_text.append(text_dict[id]) full_cat.append(cat_dict[id]) text_train, text_test, cat_train, cat_test = train_test_split(full_text, full_cat, test_size=.1, random_state=1) cat_test_arr = np.array(cat_test, dtype=np.float64) if pretrained: clas = Classify(pretrained='pretrained/') elif classifier == 'rf': clas = Classify(text_train, cat_train, model=RandomForestClassifier(class_weight='balanced', n_estimators=100)) elif classifier == 'svc': clas = Classify(text_train, cat_train, model=SVC(class_weight='balanced')) elif classifier == 'linearsvc': clas = Classify(text_train, cat_train, model=LinearSVC(class_weight='balanced')) else: clas = Classify(text_train, cat_train) predict = clas.predict(text_test) evals = clas.evaluation_(
} for event in set(event_dict.values()): test_rows = np.argwhere(full_event == event) train_rows = np.argwhere(full_event != event) text_test = [full_text[n[0]] for n in test_rows] cat_test = [full_cat[n[0]] for n in test_rows] text_train = [full_text[n[0]] for n in train_rows] cat_train = [full_cat[n[0]] for n in train_rows] cat_test_arr = np.array(cat_test, dtype=np.float64) clas = Classify(text_train, cat_train, 1000, model=KNeighborsClassifier(n_neighbors=k, weights=weight, n_jobs=2)) predict = clas.predict(text_test) simp = clas.simple_evaluation(cat_test, predict) for key in simp: if key in model_res: model_res[key] += simp[key] stats = clas.stats_calc(model_res['True Positive'], model_res['True Negative'], model_res['False Positive'], model_res['False Negative'], model_res['One Label'], model_res['Perfect Match'])
'Perfect Match': 0 } for event in set(event_dict.values()): test_rows = np.argwhere(full_event == event) train_rows = np.argwhere(full_event != event) text_test = [full_text[n[0]] for n in test_rows] cat_test = [full_cat[n[0]] for n in test_rows] text_train = [full_text[n[0]] for n in train_rows] cat_train = [full_cat[n[0]] for n in train_rows] cat_test_arr = np.array(cat_test, dtype=np.float64) clas = Classify(text_train, cat_train, 2000, model=BernoulliNB(alpha=alpha)) predict = clas.predict(text_test) simp = clas.evaluate(cat_test, predict) for key in simp: if key in model_res: model_res[key] += simp[key] stats = clas.stats_calc(model_res['True Positive'], model_res['True Negative'], model_res['False Positive'], model_res['False Negative'], model_res['One Label'], model_res['Perfect Match'])
'Perfect Match': 0 } for event in set(event_dict.values()): test_rows = np.argwhere(full_event == event) train_rows = np.argwhere(full_event != event) text_test = [full_text[n[0]] for n in test_rows] cat_test = [full_cat[n[0]] for n in test_rows] text_train = [full_text[n[0]] for n in train_rows] cat_train = [full_cat[n[0]] for n in train_rows] cat_test_arr = np.array(cat_test, dtype=np.float64) clas = Classify(text_train, cat_train, 2000, model=LogisticRegression(C=c, class_weight=weight)) predict = clas.predict(text_test) simp = clas.evaluate(cat_test, predict) for key in simp: if key in model_res: model_res[key] += simp[key] stats = clas.stats_calc(model_res['True Positive'], model_res['True Negative'], model_res['False Positive'], model_res['False Negative'], model_res['One Label'], model_res['Perfect Match'])
full_cat_confusion = dict() for event in set(event_dict.values()): print("Event: ", event) test_rows = np.argwhere(full_event == event) train_rows = np.argwhere(full_event != event) text_test = [full_text[n[0]] for n in test_rows] cat_test = [full_cat[n[0]] for n in test_rows] text_train = [full_text[n[0]] for n in train_rows] cat_train = [full_cat[n[0]] for n in train_rows] cat_test_arr = np.array(cat_test, dtype=np.float64) if pretrained: clas = Classify(pretrained='pretrained/') elif classifier == 'rf': clas = Classify(text_train, cat_train, 2000, model=RandomForestClassifier(class_weight='balanced', n_estimators=1)) elif classifier == 'lsvc': clas = Classify(text_train, cat_train, 2000, model=LinearSVC(C=0.01, class_weight='balanced', loss='hinge', random_state=1)) elif classifier == 'log': clas = Classify(text_train, cat_train, 2000, model=LogisticRegression(class_weight='balanced', C=0.01)) else: clas = Classify(text_train, cat_train, 2000) predict = clas.predict(text_test)
for event in set(event_dict.values()): print("\n\n") print("Event: ", event) test_rows = np.argwhere(full_event == event) train_rows = np.argwhere(full_event != event) text_test = [full_text[n[0]] for n in test_rows] cat_test = [full_cat[n[0]] for n in test_rows] text_train = [full_text[n[0]] for n in train_rows] cat_train = [full_cat[n[0]] for n in train_rows] cat_test_arr = np.array(cat_test, dtype=np.float64) if pretrained: clas = Classify(pretrained='pretrained/') elif classifier == 'rf': clas = Classify(text_train, cat_train, 2000, model=RandomForestClassifier(class_weight='balanced', n_estimators=100)) elif classifier == 'svc': clas = Classify(text_train, cat_train, 2000, model=SVC(class_weight='balanced')) elif classifier == 'linearsvc': clas = Classify(text_train, cat_train, 2000, model=LinearSVC(class_weight='balanced')) elif classifier == 'log': clas = Classify(text_train, cat_train, 2000, model=LogisticRegression(class_weight='balanced')) else: clas = Classify(text_train, cat_train, 2000)