def train_classifier(train_data, train_labels, dev_data, dev_labels, seed):
  learning_rate = 0.005
  classifier = ml.Classifier(dim=4, lr=learning_rate, seed=seed)
  last_acc = None
  for i in range(100):   # number of epochs
    for data, label in zip(train_data, train_labels):
      classifier.learn(data, label)
    current_acc = classifier.calculate_acc(dev_data, dev_labels)
    print(current_acc)
    if last_acc and last_acc > current_acc - 0.001:
      break
    last_acc = current_acc

  return classifier, current_acc
Exemplo n.º 2
0
def train_classifier(train, dev, seed):
    learning_rate = 0.005
    classifier = ml.Classifier(dim=4, lr=learning_rate, seed=seed)
    last_acc = None
    for i in range(100):  # number of epochs
        for example in train.get_data():
            classifier.learn(example.get_data(), example.get_label())
        current_acc = classifier.calculate_acc(dev.get_data_batch(),
                                               dev.get_label_batch())
        if last_acc and last_acc > current_acc - 0.001:
            break
        last_acc = current_acc

    return classifier, current_acc
Exemplo n.º 3
0
      if datapoint[3] not in ['Alaska', 'Wyoming']:  # not legally allowed to perform experiment in these states
        if datapoint[4] == "smoking":
          smoking_label = 1
        else:
          smoking_label = 0
        dev_data.append((datapoint[0], int(datapoint[1]), datapoint[2], datapoint[3]))
        dev_labels.append(smoking_label)

## Whew, we're finished loading the data.  Let's train a classifier five times with different random seeds.
## We use the learn function to update our parameters

accs = []
for i in range(5):
  print("Seed", i)
  learning_rate = 0.005
  classifier = ml.Classifier(dim=4, lr=learning_rate, seed=i)
  last_acc = None
  for i in range(100):   # number of epochs
    for j in range(len(train_data)):
      classifier.learn(train_data[j], train_labels[j])
    current_acc = classifier.calculate_acc(dev_data, dev_labels)
    print(current_acc)
    if last_acc and last_acc > current_acc - 0.001:
      break
    last_acc = current_acc
  accs.append(current_acc)

  ## Finally, let's print out our predictions on the dev set:

  dev_predictions = classifier.predict_batch(dev_data)
  num_printed = 0