示例#1
0
        out_f.write(','.join(feature_builder.feature_names) + '\n')
        for a in articles:
            features = feature_builder.get_article_features(a)
            save_features(features, a.source, out_f)


def print_data(articles, feature_builder):
    print(','.join(feature_builder.feature_names))
    for a in articles:
        features = feature_builder.get_article_features(a)
        print_features(features, a.source)


if __name__ == '__main__':
    dr = DataReader(sys.argv[1:])
    articles = dr._make_data()
    articles.normalize()
    sent_feature_builder = SentimentFeatureBuilder()
    testing, training = articles.make_sets()

    # Print the features to STDOUT to be used as training data?
    # save_data(training, sent_feature_builder, "training.txt")
    # save_data(testing, sent_feature_builder, "testing.txt")

    # Try to use a linear SVC to fit?
    model = svm.LinearSVC()

    df_training = pd.read_csv("training.txt")
    df_testing = pd.read_csv("testing.txt")
    training_data = df_training.loc[:, df_training.columns != 'label']
    training_label = df_training['label']