def train(model, training_data, num_epochs=10, lr=1e-2, batch_size=1, validation_data=None): # Build the dataset and get the dataloader for the training data X_train, y_train = training_data train_minibatches = utils.load_data(X_train, y_train, batch_size=10) # Validation data X_valid, y_valid = validation_data valid_minibatches = utils.load_data(X_valid, y_valid, batch_size=len(y_valid)) history = {'training_loss': [], 'validation_loss': []} # This assumes that the model has a layer called input W = model.layer.weight b = model.layer.bias # Main optimization loop for epoch in range(num_epochs): # Loop over all mini-batches batch_loss = [] for inputs, targets in train_minibatches: # Compute the predicted outputs outputs = inputs.mm(W) + b # Evaluate the difference between the known targets # and the predicted targets loss = mse_loss(outputs, targets) # Optimization step #W = W - lr * g #b = b - lr * b # Add the loss for this mini-batch to the array of losses batch_loss.append(loss) # The loss for each epoch is the average loss observed for all mini-batches avg_loss = torch.tensor(batch_loss).mean().item() history['training_loss'].append(avg_loss) # Evaluate on the validation data print(f'Epoch {epoch}: {avg_loss}') # Validation loss/error for x_valid, y_valid in valid_minibatches: print(x_valid.size()) pred = model(x_valid) err = F.mse_loss(pred, y_valid) err = err.item() history['validation_loss'].append(err) return history
def augment_data(train_filename, new_train_filename): products_data_map, products_name_map = get_product_data() data = load_data(train_filename) generated_data = flatten([generate_data_from_product(sku, product['name']) for sku, product in products_data_map.items()]) generated_data_df = pd.DataFrame(generated_data, columns=['user', 'sku', 'category', 'query', 'click_time', 'query_time']) new_train = pd.concat([data, generated_data_df]).sample(frac=1).reset_index(drop=True) save_data(new_train, new_train_filename)
def get_dataset(path, features, normData=True, FourTransform=True, windowSize=None, mv_avg=10, beanFunc=Package): ''' path里面记载了一段时间区间里的通信记录 本方法读出一行记录,使用beanFunc把记录转化成一个描述通信信息的对象P(表示一个时间点,对某一个通信的记录), 记录到数据库DB中, DB的结构: db:(N,T,D)的array实际的数据 getConnectId([id1,id2]):返回idi对应的通信表示 search(connectid):返回和connectid对于的[id1,id2....],这里采用默认模糊查询 本方法还会: 1.对DB.db做标准化处理(normData=True) 2.对DB.db,使用选用windowSize的窗口大小,分段分析特征的频谱(DFT) 返回: db:DB对象 np_db:数据数据处理后的db.db :param path: :param features: :param normData: :param FourTransform: :param windowSize: :return: np_db:(N,T,D),D=len(features),如果FourTransform=False,表示原始特征.否则是FFT换后的特征, 对于FourTransform=False,np_db[:,t,d]表示在t秒的d特征的通信信息 ''' db = load_data(path, features, beanFunc) np_db = db.db # v=np.ones((mv_avg))/mv_avg # N,T,D=np_db.shape # for n in range(N): # for d in range(D): # np_db[n,:,d]=np.convolve(np_db[n,:,d],v,'same') if normData: np_db = standardizeData(np_db, 'STD') if FourTransform: np_db, feature_size, steps = fourierAnalysis(np_db, windowSize=windowSize) np_db = np.abs(np_db) return db, np_db
from sklearn.externals import joblib from sklearn.metrics import classification_report from sklearn.pipeline import Pipeline from data.utils import load_data from preprocessing import preprocess_data from visualization import plot_learning_curves, get_errors_input from metrics import custom_map_at_k from feature_selection import get_features_extractor from data_augmentation import augment_data print('Augmenting training data set') augment_data('train.csv', 'train_augmented.csv') print('Loading training and testing set') train_data = load_data('train_augmented.csv') test_data = load_data('test.csv') print('Preprocessing') X_train, Y_train = preprocess_data(train_data) X_test, Y_test = preprocess_data(test_data) model_name = 'lr' # print('Loading model') # model = joblib.load('./models/' + model_name + '_classifier.pkl') print('Fitting model') model = Pipeline([ ('features', get_features_extractor()), ('LogisticRegression', LogisticRegression()) ])
from data.utils import load_data from matplotlib import pyplot as plt from src.layers import * path = r"../data/" pp_net = load_data(path, [])['pp_adj'].tocoo() indices = torch.LongTensor( np.concatenate((pp_net.col.reshape(1, -1), pp_net.row.reshape(1, -1)), axis=0)) indices = remove_bidirection(indices, None) n_node = pp_net.shape[0] n_edge = indices.shape[1] rd = np.random.binomial(1, 0.9, n_edge) train_mask = rd.nonzero()[0] test_mask = (1 - rd).nonzero()[0] train_indices = indices[:, train_mask] train_indices = to_bidirection(train_indices, None) test_indices = indices[:, test_mask] test_indices = to_bidirection(test_indices, None) train_n_edge = train_indices.shape[1] test_n_edge = test_indices.shape[1] hid1 = 32 hid2 = 16 x = sparse_id(n_node)
from keras.regularizers import l1_l2 import keras.metrics import numpy as np from data.utils import load_data from gcnlayer import GraphConvolution from feature_eng import enhance_features, normalize from utils import preprocess_adj from custom_losses import crossentropy_weighted from scipy.sparse.csgraph import laplacian as scipy_laplacian N_EPOCHS = 1000 A, X, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( 'cora') X = X.A n_features = X.shape[1] n_vertex = A.shape[0] A_ = preprocess_adj(A) graph = [X, A_] def get_model_kipf(): adj = Input(shape=(None, None), batch_shape=(None, None), sparse=False) inp = Input(shape=(n_features,)) H = Dropout(0.5)(inp)
def train(model, training_data, num_epochs=10, lr=1e-2, batch_size=1, validation_data=None): model.double() # Function being minimized loss_fn = nn.MSELoss() # Optimization algorithm being used to minimize the loss optimizer = optim.RMSprop(model.parameters(), lr=lr) # Build the dataset and get the dataloader for the training data X_train, y_train = training_data train_minibatches = utils.load_data(X_train, y_train, batch_size=10) # Validation data X_valid, y_valid = validation_data valid_minibatches = utils.load_data(X_valid, y_valid, batch_size=len(y_valid)) history = {'training_loss': [], 'validation_loss': []} # Main optimization loop for epoch in range(num_epochs): # Loop over all mini-batches batch_loss = [] for inputs, targets in train_minibatches: # Compute the predicted outputs outputs = model(inputs) # Evaluate the difference between the known targets # and the predicted targets loss = loss_fn(outputs, targets) # Optimization step optimizer.zero_grad() loss.backward() optimizer.step() # Add the loss for this mini-batch to the array of losses batch_loss.append(loss.item()) # The loss for each epoch is the average loss observed for all mini-batches avg_loss = torch.tensor(batch_loss).mean().item() history['training_loss'].append(avg_loss) # Evaluate on the validation data print(f'Epoch {epoch}: {avg_loss}') # Validation loss/error for x_valid, y_valid in valid_minibatches: print(x_valid.size()) pred = model(x_valid) err = F.mse_loss(pred, y_valid) err = err.item() history['validation_loss'].append(err) return history