Пример #1
0
def LR():

    # Read training data
    train_tweet_id2text, train_tweet_id2issue, train_tweet_id2author_label, train_tweet_id2label = ReadFile('train.csv')

    '''
    Implement your Logistic Regression classifier here
    '''
    BOW = True
    GLOVE = False
    word2index = get_vocab(train_tweet_id2text)
    data_dict = combine_vec(word2index,train_tweet_id2text,train_tweet_id2author_label,train_tweet_id2issue,bow=True)

    data = []
    labels = []
    for k in data_dict:
        data.append(data_dict[k])
        labels.append(int(train_tweet_id2label[k]))
    n_class = len(set(labels))
    print('dataset shape:',np.shape(data))
    # print(np.shape(labels))
    
    # print(len(word2index))
    n_sample,n_feature = np.shape(data)

    model = LogRegression(n_feature,n_class,lrate=0.8,verbose=True)
    model.fit(data,labels,max_iter=500)
    # y_pred = [model.predict(x) for x in data]

    # Read test data
    test_tweet_id2text, test_tweet_id2issue, test_tweet_id2author_label, test_tweet_id2label = ReadFile('test.csv')
    
    # Predict test data by learned model

    '''
    Replace the following random predictor by your prediction function.
    '''
    test_data_dict = combine_vec(word2index,test_tweet_id2text,test_tweet_id2author_label,test_tweet_id2issue,bow = True)

    for tweet_id in test_tweet_id2text:
        # Get the text
        # text=test_tweet_id2text[tweet_id]
        
        # Predict the label
        test_x = test_data_dict[tweet_id]
        label = model.predict(test_x)

        # Store it in the dictionary
        test_tweet_id2label[tweet_id] = label

    # Save predicted labels in 'test_lr.csv'
    SaveFile(test_tweet_id2text, test_tweet_id2issue, test_tweet_id2author_label, test_tweet_id2label, 'test_lr.csv')
Пример #2
0
def NN():

    # Read training data
    train_tweet_id2text, train_tweet_id2issue, train_tweet_id2author_label, train_tweet_id2label = ReadFile(
        'train.csv')
    '''
    Implement your Neural Network classifier here
    '''
    word2index = get_vocab(train_tweet_id2text)
    data_dict = combine_vec(word2index,
                            train_tweet_id2text,
                            train_tweet_id2author_label,
                            train_tweet_id2issue,
                            bow=False)

    data = []
    labels = []
    for k in data_dict:
        data.append(data_dict[k])
        labels.append(int(train_tweet_id2label[k]))
    data = np.array(data)
    labels = np.array(labels)
    n_class = len(set(labels))
    n_sample, n_feature = np.shape(data)
    print(np.shape(data))

    lrates = [0.2, 0.4, 0.9]
    all_loss = []
    for r in lrates:
        model = LogRegression(n_feature, n_class, lrate=r, verbose=True)
        train_loss = model.fit(data, labels, max_iter=500)
        print(len(train_loss))
        all_loss.append(train_loss)
    file_name = 'train_loss_nn.pdf'
    plot_lr(lrates, all_loss, file_name)
Пример #3
0
def NN():

    # Read training data
    train_tweet_id2text, train_tweet_id2issue, train_tweet_id2author_label, train_tweet_id2label = ReadFile('train.csv')

    '''
    Implement your Neural Network classifier here
    '''
    word2index = get_vocab(train_tweet_id2text)
    data_dict = combine_vec(word2index,train_tweet_id2text,train_tweet_id2author_label,train_tweet_id2issue, bow = False)

    data = []
    labels = []
    for k in data_dict:
        data.append(data_dict[k])
        labels.append(int(train_tweet_id2label[k]))
    data = np.array(data)
    labels = np.array(labels)
    n_class = len(set(labels))
    n_sample,n_feature = np.shape(data)
    print(np.shape(data))

    model = NeuralNet(n_feature,n_class,lrate=0.9,verbose=True)
    model.fit(data,labels,max_iter=800)

    # Read test data
    test_tweet_id2text, test_tweet_id2issue, test_tweet_id2author_label, test_tweet_id2label = ReadFile('test.csv')
    '''
    Replace the following random predictor by your prediction function.
    '''
    test_data_dict = combine_vec(word2index,test_tweet_id2text,test_tweet_id2author_label,test_tweet_id2issue,bow = False)

    for tweet_id in test_tweet_id2text:
        # Predict the label
        test_x = test_data_dict[tweet_id]
        label = model.predict(test_x)

        # Store it in the dictionary
        test_tweet_id2label[tweet_id] = label

    # Save predicted labels in 'test_lr.csv'
    SaveFile(test_tweet_id2text, test_tweet_id2issue, test_tweet_id2author_label, test_tweet_id2label, 'test_nn.csv')
Пример #4
0
def cv_NN(kfold):
    # Read training data
    train_tweet_id2text, train_tweet_id2issue, train_tweet_id2author_label, train_tweet_id2label = ReadFile(
        'train.csv')
    '''
    Implement your Neural Network classifier here
    '''
    word2index = get_vocab(train_tweet_id2text)
    data_dict = combine_vec(word2index,
                            train_tweet_id2text,
                            train_tweet_id2author_label,
                            train_tweet_id2issue,
                            bow=False)

    data = []
    labels = []
    for k in data_dict:
        data.append(data_dict[k])
        labels.append(int(train_tweet_id2label[k]))
    data = np.array(data)
    labels = np.array(labels)
    n_class = len(set(labels))
    n_sample, n_feature = np.shape(data)
    print('Cross validation for Neural network')
    n_sample, n_feature = np.shape(data)
    fold_size = int(np.ceil(n_sample / kfold))
    print('Fold size:', fold_size)
    accuracy = []
    for k in range(kfold):
        tstart = k * fold_size
        tend = min(n_sample, tstart + fold_size)
        training_x = np.array(
            [x for i, x in enumerate(data) if not (tstart <= i and i < tend)])
        test_x = np.array(
            [x for i, x in enumerate(data) if (tstart <= i and i < tend)])
        training_y = np.array([
            x for i, x in enumerate(labels) if not (tstart <= i and i < tend)
        ])
        test_y = np.array(
            [x for i, x in enumerate(labels) if (tstart <= i and i < tend)])
        model = NeuralNet(n_feature, n_class, lrate=0.9, verbose=False)
        model.fit(training_x, training_y, max_iter=500)
        accuracy.append(model.score(test_x, test_y))
        print('Fold', k, 'accuracy', accuracy[-1])
    print('Mean accuracy', np.mean(accuracy))
Пример #5
0
def LR():

    # Read training data
    train_tweet_id2text, train_tweet_id2issue, train_tweet_id2author_label, train_tweet_id2label = ReadFile(
        'train.csv')
    '''
    Implement your Logistic Regression classifier here
    '''
    BOW = True
    GLOVE = False
    word2index = get_vocab(train_tweet_id2text)
    data_dict = combine_vec(word2index,
                            train_tweet_id2text,
                            train_tweet_id2author_label,
                            train_tweet_id2issue,
                            bow=True)

    data = []
    labels = []
    for k in data_dict:
        data.append(data_dict[k])
        labels.append(int(train_tweet_id2label[k]))
    n_class = len(set(labels))
    print(np.shape(data))
    # print(np.shape(labels))

    # print(len(word2index))
    n_sample, n_feature = np.shape(data)
    lrates = [0.2, 0.5, 0.8]
    all_loss = []
    for r in lrates:
        model = LogRegression(n_feature, n_class, lrate=r, verbose=True)
        train_loss = model.fit(data, labels, max_iter=200)
        print(len(train_loss))
        all_loss.append(train_loss)
    file_name = 'train_loss_lr.pdf'
    plot_lr(lrates, all_loss, file_name)