示例#1
0
文件: model.py 项目: yysherlock/msae
    def train(self):
        outputPrefix=self.readField(self.config,self.name,"output_directory")
        outputDir=os.path.join(outputPrefix,self.name)
        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        showFreq = int(self.readField(self.config, self.name, "show_freq"))
        if showFreq > 0:
            visDir = os.path.join(outputDir,'vis')
            if not os.path.exists(visDir):
                os.mkdir(visDir)
        #do normalization for images if they are not normalized before
        normalize=self.str2bool(self.readField(self.config, self.name, "normalize"))
        trainDataSize=int(self.readField(self.config, self.name, "train_size"))
        numBatch = trainDataSize / self.batchsize
        trainDataPath = self.readField(self.config, self.name, "train_data")
        if self.readField(self.config,self.name,"extract_reps")=="True":
            trainRepsPath=self.readField(self.config, self.name, "train_reps")
        else:
            trainRepsPath=None
        trainDataLoader=DataHandler(trainDataPath, trainRepsPath, self.vDim, self.hDim, self.batchsize,numBatch, normalize)

        evalFreq=int(self.readField(self.config,self.name,'eval_freq'))
        if evalFreq!=0:
            qsize=int(self.readField(self.config, self.name, "query_size"))
            evalPath=self.readField(self.config,self.name,"validation_data")
            labelPath=self.readField(self.config,self.name,"label")
            queryPath=self.readField(self.config, self.name, "query")
            label=np.load(labelPath)
            eval=Evaluator(queryPath,label ,os.path.join(outputDir,'perf'), self.name, query_size=qsize,verbose=self.verbose)
            validation_data=gp.garray(np.load(evalPath))
            if normalize:
                validation_data=trainDataLoader.doNormalization(validation_data)

        maxEpoch = int(self.readField(self.config, self.name, "max_epoch"))

        nCommon, nMetric, title=self.getDisplayFields()
        if self.verbose:
            print title
        for epoch in range(maxEpoch):
            perf=np.zeros( nMetric)
            trainDataLoader.reset()
            for i in range(numBatch):
                batch = trainDataLoader.getOneBatch()
                curr = self.trainOneBatch(batch, epoch, computeStat=True)
                perf=self.aggregatePerf(perf, curr)

            if showFreq != 0 and (1+epoch) % showFreq == 0:
                validation_code=self.getReps(validation_data)
                np.save(os.path.join(visDir, '%dvis' % (1+epoch)), validation_code)
            if evalFreq !=0 and (1+epoch) % evalFreq ==0:
                validation_code=self.getReps(validation_data)
                eval.evalSingleModal(validation_code,epoch,self.name+'V')
                validation_code=None
            if self.verbose:
                self.printEpochInfo(epoch,perf,nCommon)

        if self.readField(self.config,self.name,"checkpoint")=="True":
            self.doCheckpoint(outputDir)

        if self.readField(self.config,self.name,"extract_reps")=="True":
            if evalFreq!=0:
                validation_reps_path=self.readField(self.config, self.name, "validation_reps")
                self.extractValidationReps(validation_data, validation_reps_path)
            self.extractTrainReps(trainDataLoader, numBatch)

        self.saveConfig(outputDir)
示例#2
0
    def train(self):
        outputPrefix = self.readField(self.config, self.name,
                                      "output_directory")
        outputDir = os.path.join(outputPrefix, self.name)
        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        showFreq = int(self.readField(self.config, self.name, "show_freq"))
        if showFreq > 0:
            visDir = os.path.join(outputDir, 'vis')
            if not os.path.exists(visDir):
                os.mkdir(visDir)
        #do normalization for images if they are not normalized before
        normalize = self.str2bool(
            self.readField(self.config, self.name, "normalize"))
        trainDataSize = int(
            self.readField(self.config, self.name, "train_size"))
        numBatch = trainDataSize / self.batchsize
        trainDataPath = self.readField(self.config, self.name, "train_data")
        if self.readField(self.config, self.name, "extract_reps") == "True":
            trainRepsPath = self.readField(self.config, self.name,
                                           "train_reps")
        else:
            trainRepsPath = None
        trainDataLoader = DataHandler(trainDataPath, trainRepsPath, self.vDim,
                                      self.hDim, self.batchsize, numBatch,
                                      normalize)

        evalFreq = int(self.readField(self.config, self.name, 'eval_freq'))
        if evalFreq != 0:
            qsize = int(self.readField(self.config, self.name, "query_size"))
            evalPath = self.readField(self.config, self.name,
                                      "validation_data")
            labelPath = self.readField(self.config, self.name, "label")
            queryPath = self.readField(self.config, self.name, "query")
            label = np.load(labelPath)
            eval = Evaluator(queryPath,
                             label,
                             os.path.join(outputDir, 'perf'),
                             self.name,
                             query_size=qsize,
                             verbose=self.verbose)
            validation_data = gp.garray(np.load(evalPath))
            if normalize:
                validation_data = trainDataLoader.doNormalization(
                    validation_data)

        maxEpoch = int(self.readField(self.config, self.name, "max_epoch"))

        nCommon, nMetric, title = self.getDisplayFields()
        if self.verbose:
            print title
        for epoch in range(maxEpoch):
            perf = np.zeros(nMetric)
            trainDataLoader.reset()
            for i in range(numBatch):
                batch = trainDataLoader.getOneBatch()
                curr = self.trainOneBatch(batch, epoch, computeStat=True)
                perf = self.aggregatePerf(perf, curr)

            if showFreq != 0 and (1 + epoch) % showFreq == 0:
                validation_code = self.getReps(validation_data)
                np.save(os.path.join(visDir, '%dvis' % (1 + epoch)),
                        validation_code)
            if evalFreq != 0 and (1 + epoch) % evalFreq == 0:
                validation_code = self.getReps(validation_data)
                eval.evalSingleModal(validation_code, epoch, self.name + 'V')
                validation_code = None
            if self.verbose:
                self.printEpochInfo(epoch, perf, nCommon)

        if self.readField(self.config, self.name, "checkpoint") == "True":
            self.doCheckpoint(outputDir)

        if self.readField(self.config, self.name, "extract_reps") == "True":
            if evalFreq != 0:
                validation_reps_path = self.readField(self.config, self.name,
                                                      "validation_reps")
                self.extractValidationReps(validation_data,
                                           validation_reps_path)
            self.extractTrainReps(trainDataLoader, numBatch)

        self.saveConfig(outputDir)
示例#3
0
    def train(self):
        outputPrefix = self.readField(self.config, self.name,
                                      "output_directory")
        outputDir = os.path.join(outputPrefix, self.name)

        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        showFreq = int(self.readField(self.config, self.name, "show_freq"))
        if showFreq > 0:
            visDir = os.path.join(outputDir, 'vis')
            if not os.path.exists(visDir):
                os.mkdir(visDir)
        #do normalization for images if they are not normalized before
        normalize = self.str2bool(
            self.readField(self.config, self.name, "normalize"))
        trainDataSize = int(
            self.readField(self.config, self.name, "train_size"))
        numBatch = trainDataSize / self.batchsize

        if self.readField(self.config, self.name, "extract_reps") == "True":
            trainRepsPath = self.readField(self.config, self.name,
                                           "train_reps")
        else:
            trainRepsPath = None
        print trainDataSize

        #Ehsan
        trainDataPath = (self.readField(self.config, self.name,
                                        'train_data')).split(',')
        print trainDataPath
        trainDataLoader = [None for x in trainDataPath]
        trainDataFiles = len(trainDataPath)
        dims = [self.vDim]
        if self.config.has_option(self.name, 'train_dims'):
            dimsstr = (self.readField(self.config, self.name,
                                      'train_dims')).split(',')
            dims = [int(i) for i in dimsstr]
        for i in range(trainDataFiles):
            trainDataLoader[i] = DataHandler(trainDataPath[i], trainRepsPath,
                                             dims[i], self.hDim,
                                             self.batchsize, numBatch,
                                             normalize)

        evalFreq = int(self.readField(self.config, self.name, 'eval_freq'))
        if evalFreq != 0:
            qsize = int(self.readField(self.config, self.name, "query_size"))
            evalPath = self.readField(self.config, self.name,
                                      "validation_data")
            labelPath = self.readField(self.config, self.name, "label")
            queryPath = self.readField(self.config, self.name, "query")
            label = np.load(labelPath)
            eval = Evaluator(queryPath,
                             label,
                             os.path.join(outputDir, 'perf'),
                             self.name,
                             query_size=qsize,
                             verbose=self.verbose)
            validation_data = gp.garray(np.load(evalPath))
            if normalize:
                validation_data = trainDataLoader.doNormalization(
                    validation_data)

        maxEpoch = int(self.readField(self.config, self.name, "max_epoch"))

        nCommon, nMetric, title = self.getDisplayFields()
        if self.verbose:
            print title
        for epoch in range(maxEpoch):
            perf = np.zeros(nMetric)
            for tl in trainDataLoader:
                tl.reset()

            for i in range(numBatch):
                batches = [None for x in trainDataLoader]
                for i in range(len(batches)):
                    batches[i] = trainDataLoader[i].getOneBatch()

                batch = gp.concatenate(tuple(batches), axis=1)
                curr = self.trainOneBatch(batch, epoch, computeStat=True)
                perf = self.aggregatePerf(perf, curr)

            if showFreq != 0 and (1 + epoch) % showFreq == 0:
                validation_code = self.getReps(validation_data)
                np.save(os.path.join(visDir, '%dvis' % (1 + epoch)),
                        validation_code)
            if evalFreq != 0 and (1 + epoch) % evalFreq == 0:
                validation_code = self.getReps(validation_data)
                eval.evalSingleModal(validation_code, epoch, self.name + 'V')
                validation_code = None
            if self.verbose:
                self.printEpochInfo(epoch, perf, nCommon)

        #Ehsan
        try:
            keepDL = self.str2bool(
                self.readField(self.config, self.name, "keep_dataloader"))
            if keepDL:
                print 'saving tdl for ', self.name
                self.trainDataLoader = trainDataLoader
        except:
            print 'exception occured'

        if self.readField(self.config, self.name, "checkpoint") == "True":
            self.doCheckpoint(outputDir)

        if self.readField(self.config, self.name, "extract_reps") == "True":
            if evalFreq != 0:
                validation_reps_path = self.readField(self.config, self.name,
                                                      "validation_reps")
                self.extractValidationReps(validation_data,
                                           validation_reps_path)
            self.extractTrainReps(trainDataLoader, numBatch)

        self.saveConfig(outputDir)