Пример #1
0
    def __init__(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            B, H, W, C, P, T, O, F, U  = param.batch_size, param.map_height, param.map_width, param.closeness_sequence_length*param.nb_flow, param.period_sequence_length*param.nb_flow, param.trend_sequence_length*param.nb_flow, param.num_of_output ,param.num_of_filters, param.num_of_residual_units,            
            # get input and output          
            # shape of a input map: (Batch_size, map_height, map_width, depth(num of history maps))
            self.c_inp = tf.placeholder(tf.float32, shape=[B, H, W, C], name="closeness")
            self.p_inp = tf.placeholder(tf.float32, shape=[B, H, W, P], name="period")
            self.t_inp = tf.placeholder(tf.float32, shape=[B, H, W, T], name="trend")
            self.output = tf.placeholder(tf.float32, shape=[B, H, W, O], name="output") 

            # ResNet architecture for the three modules
            # module 1: capturing closeness (recent)
            self.closeness_output = my.ResInput(inputs=self.c_inp, filters=F, kernel_size=(7, 7), scope="closeness_input", reuse=None)
            self.closeness_output = my.ResNet(inputs=self.closeness_output, filters=F, kernel_size=(7, 7), repeats=U, scope="resnet", reuse=None)
            self.closeness_output = my.ResOutput(inputs=self.closeness_output, filters=1, kernel_size=(7, 7), scope="resnet_output", reuse=None)            
            # module 2: capturing period (near)
            self.period_output = my.ResInput(inputs=self.p_inp, filters=F, kernel_size=(7, 7), scope="period_input", reuse=None)
            self.period_output = my.ResNet(inputs=self.period_output, filters=F, kernel_size=(7, 7), repeats=U, scope="resnet", reuse=True)
            self.period_output = my.ResOutput(inputs=self.period_output, filters=1, kernel_size=(7, 7), scope="resnet_output", reuse=True)            
            # module 3: capturing trend (distant) 
            self.trend_output = my.ResInput(inputs=self.t_inp, filters=F, kernel_size=(7, 7), scope="trend_input", reuse=None)
            self.trend_output = my.ResNet(inputs=self.trend_output, filters=F, kernel_size=(7, 7), repeats=U, scope="resnet", reuse=True)
            self.trend_output = my.ResOutput(inputs=self.trend_output, filters=1, kernel_size=(7, 7), scope="resnet_output", reuse=True)            
            # parameter matrix based fusion
            self.x_res = my.Fusion(self.closeness_output, self.period_output, self.trend_output, scope="fusion", shape=[W, W])                        
            # loss function
            self.loss = tf.reduce_sum(tf.pow(self.x_res - self.output, 2)) / tf.cast((self.x_res.shape[0]), tf.float32)            
            # use Adam optimizer
            self.optimizer = tf.train.AdamOptimizer(learning_rate=param.lr, beta1=param.beta1, beta2=param.beta2, epsilon=param.epsilon).minimize(self.loss)           
            #loss summary
            tf.summary.scalar('loss', self.loss)
            self.merged = tf.summary.merge_all()            
            self.saver = tf.train.Saver(max_to_keep=None)
Пример #2
0
    def create_evalnet(self, D):
        '''
        return torch.nn.Sequential(
            torch.nn.Linear(D, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 1)
        )
        '''
        if not hasattr(self, "_evalnet"):
            self._evalnet = torch.nn.Sequential(
                torch.nn.Linear(D, 32),

                modules.ResNet(

                    modules.ResBlock(
                        block = torch.nn.Sequential(
                            modules.PrototypeClassifier(32, 32),
                            modules.polynomial.Activation(32, n_degree=6),
                            torch.nn.Linear(32, 32)
                            #torch.nn.ReLU(),
                            #torch.nn.Linear(64, 64),
                            #torch.nn.ReLU(),
                            #torch.nn.Linear(64, 64),
                        )
                    ),

                    modules.ResBlock(
                        block = torch.nn.Sequential(
                            modules.PrototypeClassifier(32, 32),
                            modules.polynomial.Activation(32, n_degree=6),
                            torch.nn.Linear(32, 32)
                            #torch.nn.ReLU(),
                            #torch.nn.Linear(64, 64),
                            #torch.nn.ReLU(),
                            #torch.nn.Linear(64, 64),
                        )
                    )
                ),
                torch.nn.Linear(32, 1)
            )
                
        return self._evalnet
Пример #3
0
def setup(data_reader_file,
          name='classifier',
          num_labels=200,
          mini_batch_size=128,
          num_epochs=1000,
          learning_rate=0.1,
          bn_statistics_group_size=2,
          fc_data_layout='model_parallel',
          warmup_epochs=50,
          learning_rate_drop_interval=50,
          learning_rate_drop_factor=0.25,
          checkpoint_interval=None):

    # Setup input data
    input = lbann.Input(target_mode = 'classification')
    images = lbann.Identity(input)
    labels = lbann.Identity(input)

    # Classification network
    head_cnn = modules.ResNet(bn_statistics_group_size=bn_statistics_group_size)
    class_fc = lbann.modules.FullyConnectedModule(num_labels,
                                                  activation=lbann.Softmax,
                                                  name=f'{name}_fc',
                                                  data_layout=fc_data_layout)
    x = head_cnn(images)
    probs = class_fc(x)

    # Setup objective function
    cross_entropy = lbann.CrossEntropy([probs, labels])
    l2_reg_weights = set()
    for l in lbann.traverse_layer_graph(input):
        if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected:
            l2_reg_weights.update(l.weights)
    l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002)
    obj = lbann.ObjectiveFunction([cross_entropy, l2_reg])

    # Setup model
    metrics = [lbann.Metric(lbann.CategoricalAccuracy([probs, labels]),
                            name='accuracy', unit='%')]
    callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()]
    if checkpoint_interval:
        callbacks.append(
            lbann.CallbackCheckpoint(
                checkpoint_dir='ckpt',
                checkpoint_epochs=5
            )
        )

    # Learning rate schedules
    if warmup_epochs:
        callbacks.append(
            lbann.CallbackLinearGrowthLearningRate(
                target=learning_rate * mini_batch_size / 128,
                num_epochs=warmup_epochs
            )
        )
    if learning_rate_drop_factor:
        callbacks.append(
            lbann.CallbackDropFixedLearningRate(
                drop_epoch=list(range(0, num_epochs, learning_rate_drop_interval)),
                amt=learning_rate_drop_factor)
        )

    # Construct model
    model = lbann.Model(num_epochs,
                        layers=lbann.traverse_layer_graph(input),
                        objective_function=obj,
                        metrics=metrics,
                        callbacks=callbacks)

    # Setup optimizer
    # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8)
    opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9)

    # Load data reader from prototext
    data_reader_proto = lbann.lbann_pb2.LbannPB()
    with open(data_reader_file, 'r') as f:
        google.protobuf.text_format.Merge(f.read(), data_reader_proto)
    data_reader_proto = data_reader_proto.data_reader
    for reader_proto in data_reader_proto.reader:
        reader_proto.python.module_dir = os.path.dirname(os.path.realpath(__file__))

    # Return experiment objects
    return model, data_reader_proto, opt
Пример #4
0
def setup(num_patches=3,
          mini_batch_size=512,
          num_epochs=75,
          learning_rate=0.005,
          bn_statistics_group_size=2,
          fc_data_layout='model_parallel',
          warmup=True,
          checkpoint_interval=None):

    # Data dimensions
    patch_dims = patch_generator.patch_dims
    num_labels = patch_generator.num_labels(num_patches)

    # Extract tensors from data sample
    input = lbann.Input()
    slice_points = [0]
    for _ in range(num_patches):
        patch_size = functools.reduce(operator.mul, patch_dims)
        slice_points.append(slice_points[-1] + patch_size)
    slice_points.append(slice_points[-1] + num_labels)
    sample = lbann.Slice(input, slice_points=str_list(slice_points))
    patches = [
        lbann.Reshape(sample, dims=str_list(patch_dims))
        for _ in range(num_patches)
    ]
    labels = lbann.Identity(sample)

    # Siamese network
    head_cnn = modules.ResNet(
        bn_statistics_group_size=bn_statistics_group_size)
    heads = [head_cnn(patch) for patch in patches]
    heads_concat = lbann.Concatenation(heads)

    # Classification network
    class_fc1 = modules.FcBnRelu(
        4096,
        statistics_group_size=bn_statistics_group_size,
        name='siamese_class_fc1',
        data_layout=fc_data_layout)
    class_fc2 = modules.FcBnRelu(
        4096,
        statistics_group_size=bn_statistics_group_size,
        name='siamese_class_fc2',
        data_layout=fc_data_layout)
    class_fc3 = lbann.modules.FullyConnectedModule(num_labels,
                                                   activation=lbann.Softmax,
                                                   name='siamese_class_fc3',
                                                   data_layout=fc_data_layout)
    x = class_fc1(heads_concat)
    x = class_fc2(x)
    probs = class_fc3(x)

    # Setup objective function
    cross_entropy = lbann.CrossEntropy([probs, labels])
    l2_reg_weights = set()
    for l in lbann.traverse_layer_graph(input):
        if type(l) == lbann.Convolution or type(l) == lbann.FullyConnected:
            l2_reg_weights.update(l.weights)
    l2_reg = lbann.L2WeightRegularization(weights=l2_reg_weights, scale=0.0002)
    obj = lbann.ObjectiveFunction([cross_entropy, l2_reg])

    # Setup model
    metrics = [
        lbann.Metric(lbann.CategoricalAccuracy([probs, labels]),
                     name='accuracy',
                     unit='%')
    ]
    callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()]
    if checkpoint_interval:
        callbacks.append(
            lbann.CallbackCheckpoint(checkpoint_dir='ckpt',
                                     checkpoint_epochs=5))

    # Learning rate schedules
    if warmup:
        callbacks.append(
            lbann.CallbackLinearGrowthLearningRate(target=learning_rate *
                                                   mini_batch_size / 128,
                                                   num_epochs=5))
    callbacks.append(
        lbann.CallbackDropFixedLearningRate(drop_epoch=list(range(0, 100, 15)),
                                            amt=0.25))

    # Construct model
    model = lbann.Model(num_epochs,
                        layers=lbann.traverse_layer_graph(input),
                        objective_function=obj,
                        metrics=metrics,
                        callbacks=callbacks)

    # Setup optimizer
    opt = lbann.SGD(learn_rate=learning_rate, momentum=0.9)
    # opt = lbann.Adam(learn_rate=learning_rate, beta1=0.9, beta2=0.999, eps=1e-8)

    # Setup data reader
    data_reader = make_data_reader(num_patches)

    # Return experiment objects
    return model, data_reader, opt
Пример #5
0
    def __init__(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            B, H, W, C, P, T, O, F, U, V, S, N, R = param.batch_size, param.map_height, param.map_width, param.closeness_sequence_length, param.period_sequence_length, param.trend_sequence_length, param.num_of_output_tec_maps, param.num_of_filters, param.num_of_residual_units, param.exo_values, param.gru_size, param.gru_num_layers, param.resnet_out_filters

            #ResNet architecture for the three modules
            with tf.device('/device:GPU:0'):
                #module 1: Capturing the closeness(recent)
                if (param.closeness_channel == True):
                    #shape of a tec map: (Batch_size, map_height, map_width, depth(num of history tec maps))
                    self.c_tec = tf.placeholder(tf.float32,
                                                shape=[None, H, W, C],
                                                name="closeness_tec_maps")
                    print("closeness input shape:", self.c_tec.shape)
                    self.closeness_input = my.ResInput(
                        inputs=self.c_tec,
                        filters=F,
                        kernel_size=param.kernel_size,
                        scope="closeness_input",
                        reuse=None)
                    self.closeness_resnet = my.ResNet(
                        inputs=self.closeness_input,
                        filters=F,
                        kernel_size=param.kernel_size,
                        repeats=U,
                        scope="closeness_resnet",
                        reuse=None)
                    self.closeness_output = my.ResOutput(
                        inputs=self.closeness_resnet,
                        filters=R,
                        kernel_size=param.kernel_size,
                        scope="closeness_output",
                        reuse=None)

            with tf.device('/device:GPU:1'):
                #module 2: Capturing the period(near)
                if (param.period_channel == True):
                    #shape of a tec map: (Batch_size, map_height, map_width, depth(num of history tec maps))
                    self.p_tec = tf.placeholder(tf.float32,
                                                shape=[None, H, W, P],
                                                name="period_tec_maps")
                    print("period input shape:", self.p_tec.shape)
                    self.period_input = my.ResInput(
                        inputs=self.p_tec,
                        filters=F,
                        kernel_size=param.kernel_size,
                        scope="period_input",
                        reuse=None)
                    self.period_resnet = my.ResNet(
                        inputs=self.period_input,
                        filters=F,
                        kernel_size=param.kernel_size,
                        repeats=U,
                        scope="period_resnet",
                        reuse=None)
                    self.period_output = my.ResOutput(
                        inputs=self.period_resnet,
                        filters=R,
                        kernel_size=param.kernel_size,
                        scope="period_output",
                        reuse=None)

            with tf.device('/device:GPU:0'):
                #module 3: Capturing the trend(distant)
                if (param.trend_channel == True):
                    #shape of a tec map: (Batch_size, map_height, map_width, depth(num of history tec maps))
                    self.t_tec = tf.placeholder(tf.float32,
                                                shape=[None, H, W, T],
                                                name="trend_tec_maps")
                    print("trend input shape:", self.t_tec.shape)
                    self.trend_input = my.ResInput(
                        inputs=self.t_tec,
                        filters=F,
                        kernel_size=param.kernel_size,
                        scope="trend_input",
                        reuse=None)
                    self.trend_resnet = my.ResNet(
                        inputs=self.trend_input,
                        filters=F,
                        kernel_size=param.kernel_size,
                        repeats=U,
                        scope="trend_resnet",
                        reuse=None)
                    self.trend_output = my.ResOutput(
                        inputs=self.trend_resnet,
                        filters=R,
                        kernel_size=param.kernel_size,
                        scope="trend_output",
                        reuse=None)

            if (param.add_exogenous == True):
                #lookback for exogenous is same as trend freq*trend length
                self.exogenous = tf.placeholder(
                    tf.float32,
                    shape=[None, param.trend_freq * T, V],
                    name="exogenous")
                print("exogenous variable", self.exogenous.shape)

                #processing with exogenous variables
                #this will be of shape (batch_size, gru_size)
                self.external = my.exogenous_module(self.exogenous, S, N)
                #shape (batch_size, 1, gru_size)
                self.external = tf.expand_dims(self.external, 1)

                #combining the exogenous and each module output
                #populating the exogenous variable
                self.val = tf.tile(self.external, [1, H * W, 1])
                self.exo = tf.reshape(self.val, [-1, H, W, S])

                #concatenate the modules output with the exogenous module output
                with tf.device('/device:GPU:0'):
                    if (param.closeness_channel == True):
                        self.close_concat = tf.concat(
                            [self.exo, self.closeness_output],
                            3,
                            name="close_concat")
                        #last convolutional layer for getting information from exo and closeness module
                        self.exo_close = tf.layers.conv2d(
                            inputs=self.close_concat,
                            filters=O,
                            kernel_size=param.kernel_size,
                            strides=(1, 1),
                            padding="SAME",
                            name="exo_close")

                with tf.device('/device:GPU:1'):
                    if (param.period_channel == True):
                        self.period_concat = tf.concat(
                            [self.exo, self.period_output],
                            3,
                            name="period_concat")
                        #last convolutional layer for getting information from exo and period module
                        self.exo_period = tf.layers.conv2d(
                            inputs=self.period_concat,
                            filters=O,
                            kernel_size=param.kernel_size,
                            strides=(1, 1),
                            padding="SAME",
                            name="exo_period")

                with tf.device('/device:GPU:0'):
                    if (param.trend_channel == True):
                        self.trend_concat = tf.concat(
                            [self.exo, self.trend_output],
                            3,
                            name="trend_concat")
                        #last convolutional layer for getting information from exo and trend module
                        self.exo_trend = tf.layers.conv2d(
                            inputs=self.trend_concat,
                            filters=O,
                            kernel_size=param.kernel_size,
                            strides=(1, 1),
                            padding="SAME",
                            name="exo_trend")

                # parameter-matrix-based fusion of the outputs after combining with exo
                if (param.closeness_channel == True
                        and param.period_channel == True
                        and param.trend_channel == True):
                    self.x_res = my.Fusion(scope="fusion",
                                           shape=[W, W],
                                           num_outputs=O,
                                           closeness_output=self.exo_close,
                                           period_output=self.exo_period,
                                           trend_output=self.exo_trend)

                elif (param.closeness_channel == True
                      and param.period_channel == True
                      and param.trend_channel == False):
                    self.x_res = my.Fusion(scope="fusion",
                                           shape=[W, W],
                                           num_outputs=O,
                                           closeness_output=self.exo_close,
                                           period_output=self.exo_period)

                elif (param.closeness_channel == True
                      and param.period_channel == False
                      and param.trend_channel == True):
                    self.x_res = my.Fusion(scope="fusion",
                                           shape=[W, W],
                                           num_outputs=O,
                                           closeness_output=self.exo_close,
                                           period_output=None,
                                           trend_output=self.exo_trend)

                elif (param.closeness_channel == True
                      and param.period_channel == False
                      and param.trend_channel == False):
                    self.x_res = my.Fusion(scope="fusion",
                                           shape=[W, W],
                                           num_outputs=O,
                                           closeness_output=self.exo_close)

            else:
                # parameter-matrix-based fusion of the outputs after combining with exo
                if (param.closeness_channel == True
                        and param.period_channel == True
                        and param.trend_channel == True):
                    self.x_res = my.Fusion(
                        scope="fusion",
                        shape=[W, W],
                        num_outputs=O,
                        closeness_output=self.closeness_output,
                        period_output=self.period_output,
                        trend_output=self.trend_output)

                elif (param.closeness_channel == True
                      and param.period_channel == True
                      and param.trend_channel == False):
                    self.x_res = my.Fusion(
                        scope="fusion",
                        shape=[W, W],
                        num_outputs=O,
                        closeness_output=self.closeness_output,
                        period_output=self.period_output)

                elif (param.closeness_channel == True
                      and param.period_channel == False
                      and param.trend_channel == True):
                    self.x_res = my.Fusion(
                        scope="fusion",
                        shape=[W, W],
                        num_outputs=O,
                        closeness_output=self.closeness_output,
                        period_output=None,
                        trend_output=self.trend_output)

                elif (param.closeness_channel == True
                      and param.period_channel == False
                      and param.trend_channel == False):
                    self.x_res = my.Fusion(
                        scope="fusion",
                        shape=[W, W],
                        num_outputs=O,
                        closeness_output=self.closeness_output)

            #shape of output tec map: (Batch_size, map_height, map_width, number of predictions)
            self.output_tec = tf.placeholder(tf.float32,
                                             shape=[None, H, W, O],
                                             name="output_tec_map")
            print("output shape:", self.output_tec)

            self.loss_weight_matrix = tf.placeholder(tf.float32,
                                                     shape=[None, H, W, O],
                                                     name="loss_weight_matrix")
            print("loss_weight_matrix:", self.loss_weight_matrix)

            #scaling the error using the loss_weight_tensor - elementwise operation
            self.tec_error = tf.multiply(
                tf.pow((self.x_res - self.output_tec), 2),
                self.loss_weight_matrix)
            print("tec_error:", self.tec_error.shape)

            #here we calculate the total sum and then divide - the inbuilt function will handle overflow
            #self.loss = tf.reduce_sum(tf.pow(self.x_res - self.output_tec, 2)) / (self.x_res.shape[0]) - this is equivalent of below one - batch size is declared none - so can't use this form

            #this is average loss per the number of output TEC maps
            #self.loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( self.tec_error, axis=3), axis=1), axis=1))

            #we have divide the loss by number of outputs so this is average loss per TEC map
            self.loss = tf.reduce_mean(
                tf.reduce_sum(tf.reduce_sum(
                    tf.reduce_sum(self.tec_error, axis=3), axis=1),
                              axis=1)) / (1.0 * O)

            #we have divide the loss by number of outputs * dim of TEC map so this is average loss per pixel in a TEC map
            #self.loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( self.tec_error, axis=3), axis=1), axis=1))/(1.0*O*H*W)

            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=param.lr,
                beta1=param.beta1,
                beta2=param.beta2,
                epsilon=param.epsilon).minimize(self.loss)

            #loss summary
            tf.summary.scalar('loss', self.loss)
            self.merged = tf.summary.merge_all()

            self.saver = tf.train.Saver(max_to_keep=None)