def learn_lr_classifier(training_corpus): D = get_vocabulary_size() labels = get_labels() w = [0] * (D + 1) norm = 1.0 num_iters = 0 while norm > convergence_threshold: num_iters += 1 if num_iters > max_iters: break old_w = list(w) shuffled = list(training_corpus) shuffle(shuffled) for vector in shuffled: label = 1.0 if float(vector[0]) == labels[0] else 0.0 prediction = get_prediction(vector[1:], w) delta = label - prediction update_weights(vector[1:], w, delta) norm = get_norm(w,old_w) return w
def test_logistic_regression(w): if not generate_labels(): return fp = FileProcessor(testing_data_filepath, ' ') rows = fp.parse_input_file() output = [] expected = [] labels = get_labels() for row in rows: expected.append(row[0]) row = row[1:] sum_val = w[0] for feature in row: feature_id = int(feature.split(':')[0]) sum_val += w[feature_id] if sigmoid(sum_val) >= 0.5: output.append(labels[0]) else: output.append(labels[1]) if fp.generate_output(output_filepath, output): print 'Successfully generated predictions.lr'