示例#1
0
def get_data():
  global train, test

  test = u.normalize_test_set_classification_scheme(test)
  train = u.normalize_test_set_classification_scheme(train)

  # Normalize data?
  train = u.reduce_dataset(train, 3000)

  # To compansate for poor TSV data structure
  i_d = 4 if len(test[0]) > 4 else 3
  t_d = 4 if len(train[0]) > 4 else 3

  docs_test, y_test = test[:,i_d], test[:,i_d-1]
  docs_train, y_train = train[:,t_d], train[:,t_d-1]


  docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity = u.generate_two_part_dataset(train)
  return docs_test, y_test, docs_train, y_train, docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity
示例#2
0
 def __init__(self, sub_clf_options, pol_clf_options, train):
     train_subjectivity, train_polarity = utils.generate_two_part_dataset(train)
     self.subjectivity_clf = BaseMethod(train_subjectivity, **sub_clf_options)
     self.polarity_clf = BaseMethod(train_polarity, **pol_clf_options)
     self.best_score = (self.subjectivity_clf.best_score + self.polarity_clf.best_score) / 2