def __init__(self): self.graph = tf.Graph() with self.graph.as_default(): B, H, W, C, P, T, O, F, U = param.batch_size, param.map_height, param.map_width, param.closeness_sequence_length*param.nb_flow, param.period_sequence_length*param.nb_flow, param.trend_sequence_length*param.nb_flow, param.num_of_output ,param.num_of_filters, param.num_of_residual_units, # get input and output # shape of a input map: (Batch_size, map_height, map_width, depth(num of history maps)) self.c_inp = tf.placeholder(tf.float32, shape=[B, H, W, C], name="closeness") self.p_inp = tf.placeholder(tf.float32, shape=[B, H, W, P], name="period") self.t_inp = tf.placeholder(tf.float32, shape=[B, H, W, T], name="trend") self.output = tf.placeholder(tf.float32, shape=[B, H, W, O], name="output") # ResNet architecture for the three modules # module 1: capturing closeness (recent) self.closeness_output = my.ResInput(inputs=self.c_inp, filters=F, kernel_size=(7, 7), scope="closeness_input", reuse=None) self.closeness_output = my.ResNet(inputs=self.closeness_output, filters=F, kernel_size=(7, 7), repeats=U, scope="resnet", reuse=None) self.closeness_output = my.ResOutput(inputs=self.closeness_output, filters=1, kernel_size=(7, 7), scope="resnet_output", reuse=None) # module 2: capturing period (near) self.period_output = my.ResInput(inputs=self.p_inp, filters=F, kernel_size=(7, 7), scope="period_input", reuse=None) self.period_output = my.ResNet(inputs=self.period_output, filters=F, kernel_size=(7, 7), repeats=U, scope="resnet", reuse=True) self.period_output = my.ResOutput(inputs=self.period_output, filters=1, kernel_size=(7, 7), scope="resnet_output", reuse=True) # module 3: capturing trend (distant) self.trend_output = my.ResInput(inputs=self.t_inp, filters=F, kernel_size=(7, 7), scope="trend_input", reuse=None) self.trend_output = my.ResNet(inputs=self.trend_output, filters=F, kernel_size=(7, 7), repeats=U, scope="resnet", reuse=True) self.trend_output = my.ResOutput(inputs=self.trend_output, filters=1, kernel_size=(7, 7), scope="resnet_output", reuse=True) # parameter matrix based fusion self.x_res = my.Fusion(self.closeness_output, self.period_output, self.trend_output, scope="fusion", shape=[W, W]) # loss function self.loss = tf.reduce_sum(tf.pow(self.x_res - self.output, 2)) / tf.cast((self.x_res.shape[0]), tf.float32) # use Adam optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=param.lr, beta1=param.beta1, beta2=param.beta2, epsilon=param.epsilon).minimize(self.loss) #loss summary tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=None)
def create_evalnet(self, D): ''' return torch.nn.Sequential( torch.nn.Linear(D, 32), torch.nn.ReLU(), torch.nn.Linear(32, 32), torch.nn.ReLU(), torch.nn.Linear(32, 1) ) ''' if not hasattr(self, "_evalnet"): self._evalnet = torch.nn.Sequential( torch.nn.Linear(D, 32), modules.ResNet( modules.ResBlock( block = torch.nn.Sequential( modules.PrototypeClassifier(32, 32), modules.polynomial.Activation(32, n_degree=6), torch.nn.Linear(32, 32) #torch.nn.ReLU(), #torch.nn.Linear(64, 64), #torch.nn.ReLU(), #torch.nn.Linear(64, 64), ) ), modules.ResBlock( block = torch.nn.Sequential( modules.PrototypeClassifier(32, 32), modules.polynomial.Activation(32, n_degree=6), torch.nn.Linear(32, 32) #torch.nn.ReLU(), #torch.nn.Linear(64, 64), #torch.nn.ReLU(), #torch.nn.Linear(64, 64), ) ) ), torch.nn.Linear(32, 1) ) return self._evalnet
def setup(data_reader_file, name='classifier', num_labels=200, mini_batch_size=128, num_epochs=1000, learning_rate=0.1, bn_statistics_group_size=2, fc_data_layout='model_parallel', warmup_epochs=50, learning_rate_drop_interval=50, learning_rate_drop_factor=0.25, checkpoint_interval=None): # Setup input data input = lbann.Input(target_mode = 'classification') images = lbann.Identity(input) labels = lbann.Identity(input) # Classification network head_cnn = modules.ResNet(bn_statistics_group_size=bn_statistics_group_size) class_fc = lbann.modules.FullyConnectedModule(num_labels, activation=lbann.Softmax, name=f'{name}_fc', data_layout=fc_data_layout) x = head_cnn(images) probs = class_fc(x) # Setup objective function cross_entropy = lbann.CrossEntropy([probs, labels]) l2_reg_weights = set() for l in lbann.traverse_layer_graph(input): if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected: l2_reg_weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002) obj = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Setup model metrics = [lbann.Metric(lbann.CategoricalAccuracy([probs, labels]), name='accuracy', unit='%')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if checkpoint_interval: callbacks.append( lbann.CallbackCheckpoint( checkpoint_dir='ckpt', checkpoint_epochs=5 ) ) # Learning rate schedules if warmup_epochs: callbacks.append( lbann.CallbackLinearGrowthLearningRate( target=learning_rate * mini_batch_size / 128, num_epochs=warmup_epochs ) ) if learning_rate_drop_factor: callbacks.append( lbann.CallbackDropFixedLearningRate( drop_epoch=list(range(0, num_epochs, learning_rate_drop_interval)), amt=learning_rate_drop_factor) ) # Construct model model = lbann.Model(num_epochs, layers=lbann.traverse_layer_graph(input), objective_function=obj, metrics=metrics, callbacks=callbacks) # Setup optimizer # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8) opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9) # Load data reader from prototext data_reader_proto = lbann.lbann_pb2.LbannPB() with open(data_reader_file, 'r') as f: google.protobuf.text_format.Merge(f.read(), data_reader_proto) data_reader_proto = data_reader_proto.data_reader for reader_proto in data_reader_proto.reader: reader_proto.python.module_dir = os.path.dirname(os.path.realpath(__file__)) # Return experiment objects return model, data_reader_proto, opt
def setup(num_patches=3, mini_batch_size=512, num_epochs=75, learning_rate=0.005, bn_statistics_group_size=2, fc_data_layout='model_parallel', warmup=True, checkpoint_interval=None): # Data dimensions patch_dims = patch_generator.patch_dims num_labels = patch_generator.num_labels(num_patches) # Extract tensors from data sample input = lbann.Input() slice_points = [0] for _ in range(num_patches): patch_size = functools.reduce(operator.mul, patch_dims) slice_points.append(slice_points[-1] + patch_size) slice_points.append(slice_points[-1] + num_labels) sample = lbann.Slice(input, slice_points=str_list(slice_points)) patches = [ lbann.Reshape(sample, dims=str_list(patch_dims)) for _ in range(num_patches) ] labels = lbann.Identity(sample) # Siamese network head_cnn = modules.ResNet( bn_statistics_group_size=bn_statistics_group_size) heads = [head_cnn(patch) for patch in patches] heads_concat = lbann.Concatenation(heads) # Classification network class_fc1 = modules.FcBnRelu( 4096, statistics_group_size=bn_statistics_group_size, name='siamese_class_fc1', data_layout=fc_data_layout) class_fc2 = modules.FcBnRelu( 4096, statistics_group_size=bn_statistics_group_size, name='siamese_class_fc2', data_layout=fc_data_layout) class_fc3 = lbann.modules.FullyConnectedModule(num_labels, activation=lbann.Softmax, name='siamese_class_fc3', data_layout=fc_data_layout) x = class_fc1(heads_concat) x = class_fc2(x) probs = class_fc3(x) # Setup objective function cross_entropy = lbann.CrossEntropy([probs, labels]) l2_reg_weights = set() for l in lbann.traverse_layer_graph(input): if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected: l2_reg_weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002) obj = lbann.ObjectiveFunction([cross_entropy, l2_reg]) # Setup model metrics = [ lbann.Metric(lbann.CategoricalAccuracy([probs, labels]), name='accuracy', unit='%') ] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] if checkpoint_interval: callbacks.append( lbann.CallbackCheckpoint(checkpoint_dir='ckpt', checkpoint_epochs=5)) # Learning rate schedules if warmup: callbacks.append( lbann.CallbackLinearGrowthLearningRate(target=learning_rate * mini_batch_size / 128, num_epochs=5)) callbacks.append( lbann.CallbackDropFixedLearningRate(drop_epoch=list(range(0, 100, 15)), amt=0.25)) # Construct model model = lbann.Model(num_epochs, layers=lbann.traverse_layer_graph(input), objective_function=obj, metrics=metrics, callbacks=callbacks) # Setup optimizer opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9) # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8) # Setup data reader data_reader = make_data_reader(num_patches) # Return experiment objects return model, data_reader, opt
def __init__(self): self.graph = tf.Graph() with self.graph.as_default(): B, H, W, C, P, T, O, F, U, V, S, N, R = param.batch_size, param.map_height, param.map_width, param.closeness_sequence_length, param.period_sequence_length, param.trend_sequence_length, param.num_of_output_tec_maps, param.num_of_filters, param.num_of_residual_units, param.exo_values, param.gru_size, param.gru_num_layers, param.resnet_out_filters #ResNet architecture for the three modules with tf.device('/device:GPU:0'): #module 1: Capturing the closeness(recent) if (param.closeness_channel == True): #shape of a tec map: (Batch_size, map_height, map_width, depth(num of history tec maps)) self.c_tec = tf.placeholder(tf.float32, shape=[None, H, W, C], name="closeness_tec_maps") print("closeness input shape:", self.c_tec.shape) self.closeness_input = my.ResInput( inputs=self.c_tec, filters=F, kernel_size=param.kernel_size, scope="closeness_input", reuse=None) self.closeness_resnet = my.ResNet( inputs=self.closeness_input, filters=F, kernel_size=param.kernel_size, repeats=U, scope="closeness_resnet", reuse=None) self.closeness_output = my.ResOutput( inputs=self.closeness_resnet, filters=R, kernel_size=param.kernel_size, scope="closeness_output", reuse=None) with tf.device('/device:GPU:1'): #module 2: Capturing the period(near) if (param.period_channel == True): #shape of a tec map: (Batch_size, map_height, map_width, depth(num of history tec maps)) self.p_tec = tf.placeholder(tf.float32, shape=[None, H, W, P], name="period_tec_maps") print("period input shape:", self.p_tec.shape) self.period_input = my.ResInput( inputs=self.p_tec, filters=F, kernel_size=param.kernel_size, scope="period_input", reuse=None) self.period_resnet = my.ResNet( inputs=self.period_input, filters=F, kernel_size=param.kernel_size, repeats=U, scope="period_resnet", reuse=None) self.period_output = my.ResOutput( inputs=self.period_resnet, filters=R, kernel_size=param.kernel_size, scope="period_output", reuse=None) with tf.device('/device:GPU:0'): #module 3: Capturing the trend(distant) if (param.trend_channel == True): #shape of a tec map: (Batch_size, map_height, map_width, depth(num of history tec maps)) self.t_tec = tf.placeholder(tf.float32, shape=[None, H, W, T], name="trend_tec_maps") print("trend input shape:", self.t_tec.shape) self.trend_input = my.ResInput( inputs=self.t_tec, filters=F, kernel_size=param.kernel_size, scope="trend_input", reuse=None) self.trend_resnet = my.ResNet( inputs=self.trend_input, filters=F, kernel_size=param.kernel_size, repeats=U, scope="trend_resnet", reuse=None) self.trend_output = my.ResOutput( inputs=self.trend_resnet, filters=R, kernel_size=param.kernel_size, scope="trend_output", reuse=None) if (param.add_exogenous == True): #lookback for exogenous is same as trend freq*trend length self.exogenous = tf.placeholder( tf.float32, shape=[None, param.trend_freq * T, V], name="exogenous") print("exogenous variable", self.exogenous.shape) #processing with exogenous variables #this will be of shape (batch_size, gru_size) self.external = my.exogenous_module(self.exogenous, S, N) #shape (batch_size, 1, gru_size) self.external = tf.expand_dims(self.external, 1) #combining the exogenous and each module output #populating the exogenous variable self.val = tf.tile(self.external, [1, H * W, 1]) self.exo = tf.reshape(self.val, [-1, H, W, S]) #concatenate the modules output with the exogenous module output with tf.device('/device:GPU:0'): if (param.closeness_channel == True): self.close_concat = tf.concat( [self.exo, self.closeness_output], 3, name="close_concat") #last convolutional layer for getting information from exo and closeness module self.exo_close = tf.layers.conv2d( inputs=self.close_concat, filters=O, kernel_size=param.kernel_size, strides=(1, 1), padding="SAME", name="exo_close") with tf.device('/device:GPU:1'): if (param.period_channel == True): self.period_concat = tf.concat( [self.exo, self.period_output], 3, name="period_concat") #last convolutional layer for getting information from exo and period module self.exo_period = tf.layers.conv2d( inputs=self.period_concat, filters=O, kernel_size=param.kernel_size, strides=(1, 1), padding="SAME", name="exo_period") with tf.device('/device:GPU:0'): if (param.trend_channel == True): self.trend_concat = tf.concat( [self.exo, self.trend_output], 3, name="trend_concat") #last convolutional layer for getting information from exo and trend module self.exo_trend = tf.layers.conv2d( inputs=self.trend_concat, filters=O, kernel_size=param.kernel_size, strides=(1, 1), padding="SAME", name="exo_trend") # parameter-matrix-based fusion of the outputs after combining with exo if (param.closeness_channel == True and param.period_channel == True and param.trend_channel == True): self.x_res = my.Fusion(scope="fusion", shape=[W, W], num_outputs=O, closeness_output=self.exo_close, period_output=self.exo_period, trend_output=self.exo_trend) elif (param.closeness_channel == True and param.period_channel == True and param.trend_channel == False): self.x_res = my.Fusion(scope="fusion", shape=[W, W], num_outputs=O, closeness_output=self.exo_close, period_output=self.exo_period) elif (param.closeness_channel == True and param.period_channel == False and param.trend_channel == True): self.x_res = my.Fusion(scope="fusion", shape=[W, W], num_outputs=O, closeness_output=self.exo_close, period_output=None, trend_output=self.exo_trend) elif (param.closeness_channel == True and param.period_channel == False and param.trend_channel == False): self.x_res = my.Fusion(scope="fusion", shape=[W, W], num_outputs=O, closeness_output=self.exo_close) else: # parameter-matrix-based fusion of the outputs after combining with exo if (param.closeness_channel == True and param.period_channel == True and param.trend_channel == True): self.x_res = my.Fusion( scope="fusion", shape=[W, W], num_outputs=O, closeness_output=self.closeness_output, period_output=self.period_output, trend_output=self.trend_output) elif (param.closeness_channel == True and param.period_channel == True and param.trend_channel == False): self.x_res = my.Fusion( scope="fusion", shape=[W, W], num_outputs=O, closeness_output=self.closeness_output, period_output=self.period_output) elif (param.closeness_channel == True and param.period_channel == False and param.trend_channel == True): self.x_res = my.Fusion( scope="fusion", shape=[W, W], num_outputs=O, closeness_output=self.closeness_output, period_output=None, trend_output=self.trend_output) elif (param.closeness_channel == True and param.period_channel == False and param.trend_channel == False): self.x_res = my.Fusion( scope="fusion", shape=[W, W], num_outputs=O, closeness_output=self.closeness_output) #shape of output tec map: (Batch_size, map_height, map_width, number of predictions) self.output_tec = tf.placeholder(tf.float32, shape=[None, H, W, O], name="output_tec_map") print("output shape:", self.output_tec) self.loss_weight_matrix = tf.placeholder(tf.float32, shape=[None, H, W, O], name="loss_weight_matrix") print("loss_weight_matrix:", self.loss_weight_matrix) #scaling the error using the loss_weight_tensor - elementwise operation self.tec_error = tf.multiply( tf.pow((self.x_res - self.output_tec), 2), self.loss_weight_matrix) print("tec_error:", self.tec_error.shape) #here we calculate the total sum and then divide - the inbuilt function will handle overflow #self.loss = tf.reduce_sum(tf.pow(self.x_res - self.output_tec, 2)) / (self.x_res.shape[0]) - this is equivalent of below one - batch size is declared none - so can't use this form #this is average loss per the number of output TEC maps #self.loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( self.tec_error, axis=3), axis=1), axis=1)) #we have divide the loss by number of outputs so this is average loss per TEC map self.loss = tf.reduce_mean( tf.reduce_sum(tf.reduce_sum( tf.reduce_sum(self.tec_error, axis=3), axis=1), axis=1)) / (1.0 * O) #we have divide the loss by number of outputs * dim of TEC map so this is average loss per pixel in a TEC map #self.loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( self.tec_error, axis=3), axis=1), axis=1))/(1.0*O*H*W) self.optimizer = tf.train.AdamOptimizer( learning_rate=param.lr, beta1=param.beta1, beta2=param.beta2, epsilon=param.epsilon).minimize(self.loss) #loss summary tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=None)