def __init__(self, d_model, d_edge, kernel_sz, enn_args={}, ann_args=None): super().__init__() assert kernel_sz <= d_model self.d_model, self.kernel_sz = d_model, kernel_sz self.enn = FullyConnectedNet(d_edge, d_model * kernel_sz, **enn_args) if ann_args: self.ann = FullyConnectedNet(1, d_model, **ann_args) else: self.ann = None
def __init__(self, d_atom, d_bond, d_sc_pair, d_sc_mol, N=6, d_model=512, d_ff=2048, d_ff_contrib=128, h=8, dropout=0.1, kernel_sz=128, enn_args={}, ann_args={}): super().__init__() assert d_model % h == 0 self.d_model = d_model c = copy.deepcopy bond_mess = ENNMessage(d_model, d_bond, kernel_sz, enn_args, ann_args) sc_mess = ENNMessage(d_model, d_sc_pair, kernel_sz, enn_args) eucl_dist_attn = MultiHeadedEuclDistAttention(h, d_model) graph_dist_attn = MultiHeadedGraphDistAttention(h, d_model) self_attn = MultiHeadedSelfAttention(h, d_model, dropout) ff = FullyConnectedNet(d_model, d_model, [d_ff], dropout=[dropout]) message_passing_layer = MessagePassingLayer(d_model, bond_mess, sc_mess, dropout, N) attending_layer = AttendingLayer(d_model, c(eucl_dist_attn), c(graph_dist_attn), c(self_attn), c(ff), dropout) self.projection = nn.Linear(d_atom, d_model) self.encoder = Encoder(message_passing_layer, attending_layer, N) self.write_head = MyCustomHead(2 * d_model + d_sc_mol, d_ff, d_ff_contrib, norm=True)
def __init__(self, *args, **kwargs): super().__init__() self.imputer_net = FullyConnectedNet(*args, **kwargs)
out_dim = 7 #PARAMETERS #These parameters were changed to optimize the performance of the network. For each parameter, n different values were given in a list, and the neural network was trained n times within a loop in order to observe the parameter value that makes th network perform best in the validation set hidden_dims = [512, 256] learning_rate = 1e-2 p_of_drop = 0.01 regul = 0.01 momentum = 0.9 batch = 50 epochs = 50 lr_decay = 0.95 net = FullyConnectedNet(hidden_dims=hidden_dims, input_dim=input_dim, num_classes=out_dim, dropout=p_of_drop, reg=regul, seed=0) solver = Solver(net, data, update_rule='sgd_momentum', optim_config={ 'learning_rate': learning_rate, 'momentum': momentum }, lr_decay=lr_decay, num_epochs=epochs, batch_size=batch, print_every=1000)
data = { 'X_train': x_train, 'y_train': y_train, 'X_val': eval_data, 'y_val': eval_labels } miu = np.mean(data['X_train'], axis=0) dl.saveData(miu, 'miu.pkl') data['X_train'] -= miu data['X_val'] -= miu model = FullyConnectedNet([1024, 512], num_classes=7, input_dim=48 * 48, dropout=0.3, reg=0.01, weight_scale=1e-2, dtype=np.float32, seed=42) solver = Solver( model, data, update_rule='sgd_momentum', optim_config={ 'learning_rate': 0.0005, 'momentum': 0.92 }, #'momentum': 0.95}, #optim_config={ # 'learning_rate': 0.0012,
from utils.data_utils import get_CIFAR10_data """ TODO: Use a Solver instance to train a TwoLayerNet that achieves at least 50% accuracy on the validation set. """ ########################################################################### # BEGIN OF YOUR CODE # ########################################################################### datapath = datadir = ( '/media/mat10/EA3F-222E/395/CW2/CW2_data/cifar-10-batches-py') data = get_CIFAR10_data() #datapath) hidden_dims = [512, 256] net = FullyConnectedNet(hidden_dims, num_classes=10, dropout=0.0, reg=0.2, seed=0) solver = Solver(net, data, update_rule='sgd_momentum', optim_config={ 'learning_rate': 1e-3, 'momentum': 0.9 }, lr_decay=0.975, num_epochs=100, batch_size=50, print_every=1000) solver.train()
########################################################################### DIC = get_CIFAR10_data() data = { 'X_train': DIC['X_train'], 'y_train': DIC['y_train'], 'X_val': DIC['X_val'], 'y_val': DIC['y_val'], } # configuration of CIFAR-10 training model = FullyConnectedNet([150, 60], num_classes=10, dropout=0, reg=0.01, weight_scale=1e-2, dtype=np.float32, seed=None) solver = Solver(model, data, update_rule='sgd_momentum', optim_config={ 'learning_rate': 0.0003, 'momentum': 0.8 }, lr_decay=0.999, num_epochs=20, batch_size=128, print_every=100)
# BEGIN OF YOUR CODE # ########################################################################### DIC = get_CIFAR10_data() data = { 'X_train': DIC['X_train'][0:49], 'y_train': DIC['y_train'][0:49], 'X_val': DIC['X_val'], 'y_val': DIC['y_val'], } # configuration of overfitting model = FullyConnectedNet([20, 30], num_classes=10, dropout=0, reg=0.5, weight_scale=1e-2, dtype=np.float32, seed=42) solver = Solver(model, data, update_rule='sgd', optim_config={ 'learning_rate': 0.0033, }, lr_decay=1, num_epochs=20, batch_size=10, print_every=100)
h1, h2, h3, h4 = 100, 100, 100, 100 hidden_dims = [h1, h2, h3, h4] # hidden_dims = [h1] input_dims = 3 * 32 * 32 # h1 = 50 # hidden_dims = [h1] num_classes = 10 lambda_reg = 0.0 # weight_scale = 1e-5 # weight_scale = 1e-2 weight_scale = 2.461858e-02 # model model = FullyConnectedNet(hidden_dims=hidden_dims, input_dims=input_dims, num_classes=num_classes, lambda_reg=lambda_reg, weight_scale=weight_scale, dtype=np.float64) # set up parameters for training update_rule = 'sgd' learning_rate = 3.113669e-04 # learning_rate = 1e-3 batch_size = 25 num_epochs = 20 print_every = 10 # solver solver = Solver(model, data, update_rule='sgd',