Пример #1
0
    def __init__(self,config):

        self.verbose = config['verbose'] 
        self.rank = config['rank'] # will be used in sharding and distinguish rng
        self.size = config['size']
        self.no_paraload=False
        try: 
            self.no_paraload = config['no_paraload']
        except:
            pass
            
        import theano
        theano.config.on_unused_input = 'warn'
        
        self.name = 'GoogLeNet'
        
        # data
        from theanompi.models.data import ImageNet_data
        self.data = ImageNet_data(verbose=False)
        self.channels = self.data.channels # 'c' mean(R,G,B) = (103.939, 116.779, 123.68)
        self.input_width = input_width # '0' single scale training 224
        self.input_height = input_height # '1' single scale training 224
        # if self.size>1: # only use avg
#             self.batch_size = batch_size/self.size
#         else: # TODO find out if this works better
        self.batch_size = batch_size # 'b
        self.file_batch_size = file_batch_size
        self.n_softmax_out = self.data.n_class
        
        # mini batching and other data parallel common routine
        self.data.batch_data(file_batch_size)
        self.data.extend_data(rank=self.rank, size=self.size)
        self.data.shuffle_data(mode='train', common_seed=1234)
        self.data.shuffle_data(mode='val')
        self.data.shard_data(mode='train', rank=self.rank, size=self.size) # to update data.n_batch_train
        self.data.shard_data(mode='val', rank=self.rank, size=self.size) # to update data.n_batch_val
        
        # training related
        self.n_epochs = n_epochs
        self.epoch = 0
        self.step_idx = 0
        self.mu = momentum # def: 0.9 # momentum
        self.use_momentum = use_momentum
        self.use_nesterov_momentum = use_nesterov_momentum
        self.eta = weight_decay #0.0002 # weight decay
        self.monitor_grad = monitor_grad
        
        self.base_lr = np.float32(learning_rate)
        self.shared_lr = theano.shared(self.base_lr)
        self.shared_x = theano.shared(np.zeros((
                                                3,
                                                self.input_width,#self.data.width, 
                                                self.input_height,#self.data.height,
                                                file_batch_size
                                                ), 
                                                dtype=theano.config.floatX),  
                                                borrow=True)                           
        self.shared_y = theano.shared(np.zeros((file_batch_size,), 
                                          dtype=int),   borrow=True) 
        # slice batch if needed
        import theano.tensor as T                     
        subb_ind = T.iscalar('subb')  # sub batch index
        self.subb_ind = subb_ind
        self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]
        self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]
        
        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        
        self.build_model()
        self.output = self.output_layer.output
        from theanompi.models.layers2 import get_params, get_layers, count_params
        #self.layers = get_layers(lastlayer = self.output_layer)
        self.params,self.weight_types = get_params(self.layers)
        count_params(self.params, verbose=self.verbose)
        self.grads = T.grad(self.cost,self.params)

        # To be compiled
        self.compiled_train_fn_list = []
        self.train_iter_fn = None
        self.val_iter_fn = None
        
        # iter related
        self.n_subb = file_batch_size//batch_size
        self.current_t = 0 # current filename pointer in the filename list
        self.last_one_t = False # if pointer is pointing to the last filename in the list
        self.subb_t = 0 # sub-batch index
        
        self.current_v=0
        self.last_one_v=False
        self.subb_v=0
        
        # preprocessing
        self.batch_crop_mirror = batch_crop_mirror
        self.input_width = input_width
        
        if self.data.para_load and not self.no_paraload:

            self.data.spawn_load()
            self.data.para_load_init(self.shared_x, input_width, input_height,
                                    rand_crop, batch_crop_mirror)
Пример #2
0
    def __init__(self, input, input_shape = None, output_shape = None, 
                  n1x1=64, nr3x3=96, n3x3=128, nr5x5=16, n5x5=32, npj=32, 
                  lib_conv='cudnn', printinfo=False):  
                  
        
        self.get_input_shape(input,input_shape) 
        self.verbose = printinfo
        
        layers=[]
        outlayers=[]
    
        if n1x1 > 0:   

            l_1x1 =        Conv(input=input,# (128, 192,28,28), 
                                convstride=1, padsize=0,
                                W = Normal((n1x1, self.input_shape[1], 1, 1), mean = 0.0, std=0.03 ),
                                b = Constant((n1x1,), val = 0.2),
                                lib_conv=lib_conv,
                                printinfo=self.verbose
                                )   
    
    
            layers.append(l_1x1)
            outlayers.append(l_1x1)
            
        if n3x3 > 0:
            
            if nr3x3 > 0:


                l_r3x3 = Conv(input=input,
                                convstride=1, padsize=0,
                                W = Normal((nr3x3, self.input_shape[1], 1, 1),mean = 0.0,std=0.09),
                                b = Constant((nr3x3,), val = 0.2),
                                lib_conv=lib_conv,
                                printinfo=self.verbose
                                )   
        
        
                layers.append(l_r3x3)                                               
                
            else:
                l_r3x3 = l_in


            l_3x3 =        Conv(input=l_r3x3,
                                convstride=1, padsize=1,
                                W = Normal((n3x3, nr3x3, 3, 3), mean = 0.0, std=0.03 ),
                                b = Constant((n3x3,), val = 0.2),
                                lib_conv=lib_conv,
                                printinfo=self.verbose
                                )
    
    
            layers.append(l_3x3)
            outlayers.append(l_3x3)
        
    
        if n5x5 > 0:
        
            if nr5x5 > 0:
 

                l_r5x5 = Conv(input=input,
                                convstride=1, padsize=0,
                                W = Normal((nr5x5, self.input_shape[1], 1, 1), mean = 0.0, std=0.2 ),
                                b = Constant((nr5x5,), val = 0.2),
                                lib_conv=lib_conv,
                                printinfo=self.verbose                                
                                )   
        
        
                layers.append(l_r5x5)
                
            else:
                l_r5x5 = l_in


            l_5x5 =        Conv(input=l_r5x5,
                                convstride=1, padsize=2,
                                W = Normal((n5x5, nr5x5, 5, 5), mean = 0.0, std=0.03 ),
                                b = Constant((n5x5,), val = 0.2 ),
                                lib_conv=lib_conv,
                                printinfo=self.verbose  
                                )
                        
    
            layers.append(l_5x5)
            outlayers.append(l_5x5)
    
        if npj > 0:
                                            
            l_pool     =   Pool(input=input, 
                                poolsize=3, 
                                poolstride=1, 
                                poolpad=1,
                                mode = 'max',
                                printinfo=self.verbose
                                )                              

            l_pool_project=Conv(input=l_pool,
                                convstride=1, padsize=0,
                                W = Normal((npj, self.input_shape[1], 1, 1), mean = 0.0, std=0.1 ),
                                b = Constant((npj,), val = 0.2 ),
                                lib_conv=lib_conv,
                                printinfo=self.verbose  
                                )   
    
    
            layers.append(l_pool_project)
            outlayers.append(l_pool_project)          
        
        import theano.tensor as T
        self.output = T.concatenate([layer.output for layer in outlayers], axis=1)  # bc01 concaatenate on 'c'
        
        self.params, self.weight_type = get_params(layers)
            
        if output_shape:
            self.output_shape = output_shape 
        else:
            self.output_shape = self.get_output_shape(self.input_shape)
        
        self.name = 'Inception ({})'.format(lib_conv)
        if printinfo: self.print_shape()
Пример #3
0
    def __init__(self, input, n_softmax_out, input_shape=None, output_shape= None,
                 lib_conv='cudnn', printinfo=False):
        
        
        self.get_input_shape(input,input_shape) 
        self.verbose = printinfo
        
        layers=[]
        outlayers=[]
        
        # input shape = (14x14x512or528)
        pool =           Pool(input=input, 
                              poolsize=5, 
                              poolstride=3, 
                              poolpad=0,
                              mode = 'average',
                              printinfo=self.verbose
                              )

        # output shape = (4x4x512or528)
        
        conv1x1        = Conv(input=pool,
                              convstride=1, padsize=0,
                              W = Normal((128, self.input_shape[1], 1, 1),mean=0.0, std=0.1),
                              b = Constant((128,),val = 0.2),
                              lib_conv='cudnn',
                              printinfo=self.verbose 
                              )
        layers.append(conv1x1)

        # output shape = (4x4x128)
        
                             
        l_flatten =   Flatten(input = conv1x1, #5
                              #input_shape=conv_5x5.output_shape, # (b, 64, 2, 2)
                              axis = 2, # expand dimensions after the first dimension
                              printinfo=self.verbose
                              #output_shape = (b,64*2*2)
                              )

        # output shape = (2048)
        
        fc            =     FC(input= l_flatten, 
                               #n_in=2048,
                               n_out=1024,
                               W = Normal((l_flatten.output_shape[1],1024),mean=0,std=0.01),
                               b = Constant((1024,),val=0),
                               printinfo=self.verbose
                               #input_shape = flatten.output_shape # (b, 9216)
                               )
        
        layers.append(fc)      
        
        drp =          Dropout(input=fc,
                              #n_in=1024,
                               n_out=fc.output_shape[1], 
                               prob_drop=0.7,
                               printinfo=self.verbose
                               )
        
        softmax_layer= Softmax(input=drp,  
                               n_out=n_softmax_out,
                               W = Normal((drp.output_shape[1], n_softmax_out), mean=0, std=0.01),
                               b = Constant((n_softmax_out,),val=0),
                               printinfo=self.verbose
                               )
        
        layers.append(softmax_layer)
        
        self.output = softmax_layer.p_y_given_x
        self.negative_log_likelihood = softmax_layer.negative_log_likelihood
        
        self.params, self.weight_type = get_params(layers)
            
        if output_shape:
            self.output_shape = output_shape 
        else:
            self.output_shape = self.get_output_shape(self.input_shape)
        
        self.name = 'AuxTower ({})'.format(lib_conv)
        if printinfo: self.print_shape()
Пример #4
0
    def __init__(self, config):

        self.verbose = config['verbose']
        self.rank = config['rank'] # will be used in sharding and distinguish rng
        self.size = config['size']
        self.no_paraload=False
        try: 
            self.no_paraload = config['no_paraload']
        except:
            pass
         
        import theano
        theano.config.on_unused_input = 'warn'
        self.name = 'AlexNet'
        
        # data
        import theanompi.models.data.imagenet as imagenet
        from theanompi.models.data import ImageNet_data
        imagenet.sc=True
        self.data = ImageNet_data(verbose=False)
        self.channels = self.data.channels # 'c' mean(R,G,B) = (103.939, 116.779, 123.68)
        self.input_width = input_width # '0' single scale training 224
        self.input_height = input_height # '1' single scale training 224
        # if self.size>1: # only use avg
#             self.batch_size = batch_size/self.size
#         else: # TODO find out if this works better
        self.batch_size = batch_size # 'b
        self.file_batch_size = file_batch_size
        self.n_softmax_out = self.data.n_class
        
        # mini batching
        self.data.batch_data(file_batch_size)
        #self.data.shuffle_data()
        
        # training related
        self.n_epochs = n_epochs
        self.epoch = 0
        self.step_idx = 0
        self.mu = momentum # def: 0.9 # momentum
        self.use_momentum = use_momentum
        self.use_nesterov_momentum = use_nesterov_momentum
        self.eta = weight_decay #0.0002 # weight decay
        self.monitor_grad = monitor_grad
        
        self.base_lr = np.float32(learning_rate)
        self.shared_lr = theano.shared(self.base_lr)
        self.shared_x = theano.shared(np.zeros((
                                                3,
                                                self.data.width, 
                                                self.data.height,
                                                file_batch_size
                                                ), 
                                                dtype=theano.config.floatX),  
                                                borrow=True)                           
        self.shared_y = theano.shared(np.zeros((file_batch_size,), 
                                          dtype=int),   borrow=True) 
        # slice batch if needed
        import theano.tensor as T                     
        subb_ind = T.iscalar('subb')  # sub batch index
        self.subb_ind = subb_ind
        self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]
        self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]
        
        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        
        self.build_model()
        self.output = self.output_layer.output
        from theanompi.models.layers2 import get_params, get_layers, count_params
        self.layers = get_layers(lastlayer = self.output_layer)
        self.params,self.weight_types = get_params(self.layers)
        count_params(self.params, verbose=self.verbose)
        self.grads = T.grad(self.cost,self.params)

        # To be compiled
        self.compiled_train_fn_list = []
        self.train_iter_fn = None
        self.val_iter_fn = None
        
        # iter related
        self.n_subb = file_batch_size//batch_size
        self.current_t = 0 # current filename pointer in the filename list
        self.last_one_t = False # if pointer is pointing to the last filename in the list
        self.subb_t = 0 # sub-batch index
        
        self.current_v=0
        self.last_one_v=False
        self.subb_v=0
        
        # preprocessing
        self.batch_crop_mirror = batch_crop_mirror
        self.input_width = input_width
        
        if self.data.para_load and not self.no_paraload:
            
            self.data.spawn_load()
            self.data.para_load_init(self.shared_x)
Пример #5
0
    def __init__(self,
                 input,
                 input_shape=None,
                 output_shape=None,
                 n1x1=64,
                 nr3x3=96,
                 n3x3=128,
                 nr5x5=16,
                 n5x5=32,
                 npj=32,
                 lib_conv='cudnn',
                 printinfo=False):

        self.get_input_shape(input, input_shape)
        self.verbose = printinfo

        layers = []
        outlayers = []

        if n1x1 > 0:

            l_1x1 = Conv(
                input=input,  # (128, 192,28,28), 
                convstride=1,
                padsize=0,
                W=Normal((n1x1, self.input_shape[1], 1, 1), mean=0.0,
                         std=0.03),
                b=Constant((n1x1, ), val=0.2),
                lib_conv=lib_conv,
                printinfo=self.verbose)

            layers.append(l_1x1)
            outlayers.append(l_1x1)

        if n3x3 > 0:

            if nr3x3 > 0:

                l_r3x3 = Conv(input=input,
                              convstride=1,
                              padsize=0,
                              W=Normal((nr3x3, self.input_shape[1], 1, 1),
                                       mean=0.0,
                                       std=0.09),
                              b=Constant((nr3x3, ), val=0.2),
                              lib_conv=lib_conv,
                              printinfo=self.verbose)

                layers.append(l_r3x3)

            else:
                l_r3x3 = l_in

            l_3x3 = Conv(input=l_r3x3,
                         convstride=1,
                         padsize=1,
                         W=Normal((n3x3, nr3x3, 3, 3), mean=0.0, std=0.03),
                         b=Constant((n3x3, ), val=0.2),
                         lib_conv=lib_conv,
                         printinfo=self.verbose)

            layers.append(l_3x3)
            outlayers.append(l_3x3)

        if n5x5 > 0:

            if nr5x5 > 0:

                l_r5x5 = Conv(input=input,
                              convstride=1,
                              padsize=0,
                              W=Normal((nr5x5, self.input_shape[1], 1, 1),
                                       mean=0.0,
                                       std=0.2),
                              b=Constant((nr5x5, ), val=0.2),
                              lib_conv=lib_conv,
                              printinfo=self.verbose)

                layers.append(l_r5x5)

            else:
                l_r5x5 = l_in

            l_5x5 = Conv(input=l_r5x5,
                         convstride=1,
                         padsize=2,
                         W=Normal((n5x5, nr5x5, 5, 5), mean=0.0, std=0.03),
                         b=Constant((n5x5, ), val=0.2),
                         lib_conv=lib_conv,
                         printinfo=self.verbose)

            layers.append(l_5x5)
            outlayers.append(l_5x5)

        if npj > 0:

            l_pool = Pool(input=input,
                          poolsize=3,
                          poolstride=1,
                          poolpad=1,
                          mode='max',
                          printinfo=self.verbose)

            l_pool_project = Conv(input=l_pool,
                                  convstride=1,
                                  padsize=0,
                                  W=Normal((npj, self.input_shape[1], 1, 1),
                                           mean=0.0,
                                           std=0.1),
                                  b=Constant((npj, ), val=0.2),
                                  lib_conv=lib_conv,
                                  printinfo=self.verbose)

            layers.append(l_pool_project)
            outlayers.append(l_pool_project)

        import theano.tensor as T
        self.output = T.concatenate([layer.output for layer in outlayers],
                                    axis=1)  # bc01 concaatenate on 'c'

        self.params, self.weight_type = get_params(layers)

        if output_shape:
            self.output_shape = output_shape
        else:
            self.output_shape = self.get_output_shape(self.input_shape)

        self.name = 'Inception ({})'.format(lib_conv)
        if printinfo: self.print_shape()
Пример #6
0
    def __init__(self,
                 input,
                 n_softmax_out,
                 input_shape=None,
                 output_shape=None,
                 lib_conv='cudnn',
                 printinfo=False):

        self.get_input_shape(input, input_shape)
        self.verbose = printinfo

        layers = []
        outlayers = []

        # input shape = (14x14x512or528)
        pool = Pool(input=input,
                    poolsize=5,
                    poolstride=3,
                    poolpad=0,
                    mode='average',
                    printinfo=self.verbose)

        # output shape = (4x4x512or528)

        conv1x1 = Conv(input=pool,
                       convstride=1,
                       padsize=0,
                       W=Normal((128, self.input_shape[1], 1, 1),
                                mean=0.0,
                                std=0.1),
                       b=Constant((128, ), val=0.2),
                       lib_conv='cudnn',
                       printinfo=self.verbose)
        layers.append(conv1x1)

        # output shape = (4x4x128)

        l_flatten = Flatten(
            input=conv1x1,  #5
            #input_shape=conv_5x5.output_shape, # (b, 64, 2, 2)
            axis=2,  # expand dimensions after the first dimension
            printinfo=self.verbose
            #output_shape = (b,64*2*2)
        )

        # output shape = (2048)

        fc = FC(
            input=l_flatten,
            #n_in=2048,
            n_out=1024,
            W=Normal((l_flatten.output_shape[1], 1024), mean=0, std=0.01),
            b=Constant((1024, ), val=0),
            printinfo=self.verbose
            #input_shape = flatten.output_shape # (b, 9216)
        )

        layers.append(fc)

        drp = Dropout(
            input=fc,
            #n_in=1024,
            n_out=fc.output_shape[1],
            prob_drop=0.7,
            printinfo=self.verbose)

        softmax_layer = Softmax(input=drp,
                                n_out=n_softmax_out,
                                W=Normal((drp.output_shape[1], n_softmax_out),
                                         mean=0,
                                         std=0.01),
                                b=Constant((n_softmax_out, ), val=0),
                                printinfo=self.verbose)

        layers.append(softmax_layer)

        self.output = softmax_layer.p_y_given_x
        self.negative_log_likelihood = softmax_layer.negative_log_likelihood

        self.params, self.weight_type = get_params(layers)

        if output_shape:
            self.output_shape = output_shape
        else:
            self.output_shape = self.get_output_shape(self.input_shape)

        self.name = 'AuxTower ({})'.format(lib_conv)
        if printinfo: self.print_shape()
Пример #7
0
    def __init__(self, config): 

        self.verbose = config['verbose']
        self.rank = config['rank'] # will be used in sharding and distinguish rng
        self.size = config['size']
        
        import theano
        self.name = 'Cifar10_model'
        
        # data
        from theanompi.models.data import Cifar10_data
        self.data = Cifar10_data(verbose=False)
        self.channels = self.data.channels # 'c' mean(R,G,B) = (103.939, 116.779, 123.68)
        self.input_width = input_width # '0' single scale training 224
        self.input_height = input_height # '1' single scale training 224
        # if self.size>1: # only use avg
        #     self.batch_size = batch_size/self.size
        # else:
        self.batch_size = batch_size # 'b'
        self.file_batch_size = file_batch_size
        self.n_softmax_out = self.data.n_class
        
        # mini batching and other data parallel common routine
        self.data.batch_data(file_batch_size)
        self.data.extend_data(rank=self.rank, size=self.size)
        self.data.shuffle_data(mode='train', common_seed=1234)
        self.data.shuffle_data(mode='val')
        self.data.shard_data(mode='train', rank=self.rank, size=self.size) # to update data.n_batch_train
        self.data.shard_data(mode='val', rank=self.rank, size=self.size) # to update data.n_batch_val
        
        # preprocessing
        self.batch_crop_mirror = batch_crop_mirror
        self.input_width = input_width
        
        # training related
        self.n_epochs = n_epochs
        self.epoch = 0
        self.step_idx = 0
        self.mu = momentum # def: 0.9 # momentum
        self.use_momentum = use_momentum
        self.use_nesterov_momentum = use_nesterov_momentum
        self.eta = weight_decay #0.0002 # weight decay
        self.monitor_grad = monitor_grad
        
        self.base_lr = np.float32(learning_rate)
        self.shared_lr = theano.shared(self.base_lr)
        self.shared_x = theano.shared(np.zeros((
                                                3,
                                                self.data.width, 
                                                self.data.height,
                                                file_batch_size
                                                ), 
                                                dtype=theano.config.floatX),  
                                                borrow=True)                           
        self.shared_y = theano.shared(np.zeros((file_batch_size,), 
                                          dtype=int),   borrow=True) 
        # slice batch if needed
        import theano.tensor as T                     
        subb_ind = T.iscalar('subb')  # sub batch index
        self.subb_ind = subb_ind
        self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]
        self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]                             
        # build model
        self.build_model()
        self.output = self.output_layer.output
        from theanompi.models.layers2 import get_params, get_layers, count_params
        self.layers = get_layers(lastlayer = self.output_layer)
        self.params,self.weight_types = get_params(self.layers)
        count_params(self.params, self.verbose)
        self.grads = T.grad(self.cost,self.params)
        
        # To be compiled
        self.compiled_train_fn_list = []
        self.train_iter_fn = None
        self.val_iter_fn = None
        
        # iter related
        self.n_subb = file_batch_size//batch_size
        self.current_t = 0 # current filename pointer in the filename list
        self.last_one_t = False # if pointer is pointing to the last filename in the list
        self.subb_t = 0 # sub-batch index
        
        self.current_v=0
        self.last_one_v=False
        self.subb_v=0