Пример #1
0
    ele = i.rstrip().split()
    # ~ GXB01170_2018.fast5|233|23,3,1,00,2,3	0	0|0|0|0|0	0.47062142444662086|0.8176029853529686|0.7531814474848483|-0.0835983106934529|-0.5416802793696001	0.14888963355157137|0.1977707177676313|0.11147011292496822|0.18032028688587404|0.15884083915957087	0.5053374754088856|0.9240774715516673|0.7871817035819118|-0.06103521168167164|-0.5495652071815835   15|6|6|138|64
    insert = []
    for item in [ele[3], ele[4], ele[5], ele[6]]:
        for itemsub in item.split("|"):
            insert.append(float(itemsub))
    X.append(insert)
    Y.append(0)
#######################################
X = np.array(X)
Y = np.array(Y)
#########################################
#split the data to  4:1
x_train, x_test, y_train, y_test = ts(X,
                                      Y,
                                      test_size=0.2,
                                      random_state=0,
                                      shuffle=True)
################################################################################################
######################################################################################################################################################################################
from xgboost.sklearn import XGBClassifier
from sklearn import metrics
print(Counter(Y))
print(Counter(y_train))
print(Counter(y_test))
clf = XGBClassifier(n_jobs=-1,
                    learning_rate=0.3,
                    tree_method='gpu_exact',
                    n_estimatores=58,
                    alpha=0.1,
                    gamma=0,
from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import train_test_split as ts

#使用鸢尾花数据集跑skleanr的svm模型,对鸢尾花进行分类
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = ts(X, y, test_size=0.3)

# kernel = 'rbf'
clf_rbf = svm.SVC(kernel='rbf')
clf_rbf.fit(X_train, y_train)
score_rbf = clf_rbf.score(X_test, y_test)
print("The score of rbf is : %f" % score_rbf)

# kernel = 'linear'
clf_linear = svm.SVC(kernel='linear')
clf_linear.fit(X_train, y_train)
score_linear = clf_linear.score(X_test, y_test)
print("The score of linear is : %f" % score_linear)

# kernel = 'poly'
clf_poly = svm.SVC(kernel='poly')
clf_poly.fit(X_train, y_train)
score_poly = clf_poly.score(X_test, y_test)
print("The score of poly is : %f" % score_poly)
Пример #3
0
    })

    winner_name = ([
        row['Winner'], row['Runners-Up'], row['Third'], row['Fourth']
    ])

    results = dc.Neg_Rec(data_Matches, row['Year'], winner_name)
    data_train.extend(results)

data_training = pd.DataFrame(data_train)
dt = data_training
#dt['1'], dt['2'], dt['3'], dt['4'], dt['5'], dt['6'], dt['7'], dt['8'], dt['9'], dt['10'], dt['11'], dt['12'], dt['13'], dt['14'], dt['15'], dt['16'] = zip(*dt['name'].map(lambda x:  x.split('  ')))
#dt.drop(['name'], axis = 1, inplace = True)

from sklearn.model_selection import train_test_split as ts
train_df, test_df = ts(dt, test_size=0.01)

char_cnn = cp.CharCNN(max_len_s=256, max_num_s=1)
char_cnn.preporcess(labels=dt['label'].unique())

x_train, y_train = char_cnn.process(df=train_df, x_col='name', y_col='label')
x_test, y_test = char_cnn.process(df=test_df, x_col='name', y_col='label')

char_cnn.build_model()
char_cnn.train(x_train, y_train, x_test, y_test, batch_size=32, epochs=10)

y_pred = char_cnn.predict(x_test)

plt.figure(figsize=(12, 6))
sns.countplot(data_Cups['Winner'])
Пример #4
0
    feature_set = []
    labels = []
    c = len(emails)
    for email in emails:
        data = []
        f = open(email, encoding='cp437')
        words = f.read().split(" ")
        for entry in dictionary:
            data.append(words.count(entry[0]))
        feature_set.append(data)
        if "ham" in email:
            labels.append(0)
        if "spam" in email:
            labels.append(1)
        print(c)
        c = c - 1
    return feature_set, labels


d = dict()
features, labels = make_dataset(d)  #creates a dataset based on wordcount

#print(len(features),len(labels))
x_train, x_test, y_train, y_test = ts(features, labels, test_size=0.2)
clf = MultinomialNB()
clf.fit(x_train, y_train)
pred = clf.predict(x_test)
print(accuracy_score(y_test, pred))
save(clf, "spam-classifier.mdl")
import torch.nn as nn
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import torch.nn.functional as F

#Preprocess Data
torch.manual_seed(0)  #to repeat results
data = pd.read_csv('iris.csv').set_index('Id')
dummies = pd.get_dummies(data['Species'])  #from categoricals to dummies
data = pd.concat([data, dummies], axis=1).drop('Species',
                                               axis=1)  #add dummies back
print(data.shape)

X_train, X_test, y_train, y_test = ts(data.iloc[:, :-3],
                                      data.iloc[:, -3:],
                                      test_size=0.2,
                                      shuffle=True,
                                      random_state=0)
#convert data to tensors
X_train = torch.from_numpy(X_train.values).float()
X_test = torch.from_numpy(X_test.values).float()
y_train = torch.from_numpy(y_train.values).float()
y_test = torch.from_numpy(y_test.values).float()

#Create data loader
train_set = TensorDataset(X_train, y_train)
test_set = TensorDataset(X_test, y_test)
train_loader = DataLoader(
    train_set,
    batch_size=X_train.shape[0],
    shuffle=True,
Пример #6
0
def start():
    print 'reading dataset'
    ratings = pd.read_csv('dataset/ratings.dat',
                          sep="::",
                          header=None,
                          engine='python')

    ratings_pivot = pd.pivot_table(ratings[[0, 1, 2]],
                                   values=2,
                                   index=0,
                                   columns=1).fillna(0)

    train, test = ts(ratings_pivot, train_size=0.8)

    # how many nodes
    nodes_in = 3706
    nodes_hidden = 256
    nodes_out = 3706

    hidden_layer = {
        'weights': tf.Variable(tf.random_normal([nodes_in + 1, nodes_hidden]))
    }
    output_layer = {
        'weights': tf.Variable(tf.random_normal([nodes_hidden + 1, nodes_out]))
    }
    input_layer = tf.placeholder('float', [None, 3706])

    input_layer_const = tf.fill([tf.shape(input_layer)[0], 1], 1.0)
    input_layer_concat = tf.concat([input_layer, input_layer_const], 1)

    # multiply output of input_layer wth a weight matrix
    layer_1 = tf.nn.sigmoid(
        tf.matmul(input_layer_concat, hidden_layer['weights']))
    layer1_const = tf.fill([tf.shape(layer_1)[0], 1], 1.0)
    layer_concat = tf.concat([layer_1, layer1_const], 1)

    # multiply output of hidden with a weight matrix to get final output
    output_layer = tf.matmul(layer_concat, output_layer['weights'])

    output = tf.placeholder('float', [None, 3706])

    cost_function = tf.reduce_mean(tf.square(output_layer - output))

    optimizer = 0.1
    optimizer = tf.train.AdagradOptimizer(optimizer).minimize(cost_function)

    init = tf.global_variables_initializer()
    session = tf.Session()
    session.run(init)

    batch_size = 100
    epochs = 200
    images = train.shape[0]

    training(batch_size, cost_function, epochs, images, input_layer, optimizer,
             output, output_layer, session, test, train)

    user = test.iloc[99, :]

    pred = session.run(output_layer, feed_dict={input_layer: [user]})
    # TODO guardar las predicciones para no entrenarla cada vez
    print pred