Пример #1
0
    def data_load(self, label):
        data_path = os.path.join(self.opt.dataroot, self.opt.cap_scheme, label)
        self.num_train_samples = min(
            self.opt.train_size,
            len(os.listdir(os.path.join(data_path, 'train'))))
        # num_test_samples = len(os.listdir(os.path.join(data_path, 'test')))
        self.num_test_sample = min(
            2000, len(os.listdir(os.path.join(data_path, 'test'))))

        # load training set
        x_train = np.empty((self.num_train_samples, self.opt.loadHeight,
                            self.opt.loadWidth, 1),
                           dtype='uint8')
        y_train = np.empty(
            (self.num_train_samples, self.opt.cap_len * self.opt.char_set_len),
            dtype='uint8')
        train_labels = util.load_label(
            os.path.join(data_path, label + '_train.txt'))
        for i in range(self.num_train_samples):
            # print(i)
            img_name = os.path.join(data_path, 'train', str(i) + '.jpg')
            x_train[i, :, :, :] = util.load_image(img_name)
            y_train[i, :] = self.text2vec(train_labels[i])

        # load testing set
        x_test = np.empty(
            (self.num_test_sample, self.opt.loadHeight, self.opt.loadWidth, 1),
            dtype='uint8')
        y_test = np.empty(
            (self.num_test_sample, self.opt.cap_len * self.opt.char_set_len),
            dtype='uint8')
        test_labels = util.load_label(
            os.path.join(data_path, label + '_test.txt'))
        for i in range(self.num_test_sample):
            # print(i)
            img_name = os.path.join(data_path, 'test', str(i) + '.jpg')
            x_test[i, :, :, :] = util.load_image(img_name)
            y_test[i, :] = self.text2vec(test_labels[i])

        return (x_train, y_train), (x_test, y_test)
    def __init__(self, opt):
        self.opt = opt
        assert os.path.exists(
            opt.dir + "/data.txt"), "No data.txt found in specified dir"
        assert os.path.exists(
            opt.dir + "/label.txt"), "No label.txt found in specified dir"

        train_dir = opt.data_dir + "/TrainSet/"
        val_dir = opt.data_dir + "/ValidateSet/"
        test_dir = opt.data_dir + "/TestSet/"

        # split data
        if not all([
                os.path.exists(train_dir),
                os.path.exists(val_dir),
                os.path.exists(test_dir)
        ]):
            # rm existing directories
            rmdir(train_dir)
            rmdir(val_dir)
            rmdir(test_dir)

            # split data to Train, Val, Test
            logging.info("Split raw data to Train, Val and Test")
            ratios = opt.ratio
            dataset = collections.defaultdict(list)
            with open(opt.dir + '/data.txt', encoding="utf-8") as d:
                for line in d.readlines():
                    line = json.loads(line)
                    # if data has been specified data_type yet, load data as what was specified before
                    if "type" in line:
                        dataset[line["type"]].append(line)
                        continue
                    # specified data_type randomly
                    rand = random.random()
                    if rand < ratios[0]:
                        data_type = "Train"
                    elif rand < ratios[0] + ratios[1]:
                        data_type = "Validate"
                    else:
                        data_type = "Test"
                    dataset[data_type].append(line)
            # write to file
            self._WriteDataToFile(dataset["Train"], train_dir)
            self._WriteDataToFile(dataset["Validate"], val_dir)
            self._WriteDataToFile(dataset["Test"], test_dir)

        self.rid2name, self.id2rid, self.rid2id = load_label(opt.dir +
                                                             '/label.txt')
        self.num_classes = [len(item) - 2 for item in self.rid2name]

        # load dataset
        if opt.mode == "Train":
            logging.info("Load Train Dataset...")
            self.train_set = BaseDataset(self.opt, "TrainSet", self.rid2id)
            logging.info("Load Validate Dataset...")
            self.val_set = BaseDataset(self.opt, "ValidateSet", self.rid2id)
        else:
            # force batch_size for test to 1
            self.opt.batch_size = 1
            self.opt.load_thread = 1
            logging.info("Load Test Dataset...")
            self.test_set = BaseDataset(self.opt, "TestSet", self.rid2id)
def main():
    # parse options 
    op = Options()
    opt = op.parse()

    # special setting
    opt.shuffle = False
    opt.batch_size = 1
    opt.load_thread = 1

    # initialize train or test working dir
    test_dir = os.path.join(opt.classify_dir , opt.name)
    opt.model_dir = opt.dir + "/trainer_" + opt.name + "/Train/"
    if not os.path.exists(test_dir):
        os.mkdir(test_dir)

    # save options to disk
    opt2file(opt, os.path.join(test_dir, "opt.txt"))
    
    # log setting 
    log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    formatter = logging.Formatter(log_format)
    fh = logging.FileHandler(test_dir + "/deploy.log", 'a')
    fh.setFormatter(formatter)
    ch = logging.StreamHandler()
    ch.setFormatter(formatter)
    logging.getLogger().addHandler(fh)
    logging.getLogger().addHandler(ch)
    logging.getLogger().setLevel(logging.INFO)
    
    # load label  
    if opt.label_file == "":
        opt.label_file = opt.dir + "/label.txt"
    rid2name, id2rid, rid2id = load_label(opt.label_file)
    num_classes = [len(rid2name[index])-2 for index in range(len(rid2name))]
        
    # load transformer
    transformer = get_transformer(opt) 

    # load model
    model = load_model(opt, num_classes)
    model.eval()
    
    # use cuda
    if opt.cuda:
        model = model.cuda(opt.devices[0])
        cudnn.benchmark = True
    
    l = open(test_dir + "/classify_res_data.txt", 'w')
    with open(opt.classify_dir + "/data.txt") as data:
        for num, line in enumerate(data):
            logging.info(str(num+1))
            line = json.loads(line)
            input_tensor = load_image(line["image_file"], line["box"], opt, transformer) 
            input_tensor = input_tensor.unsqueeze(0)
            if opt.cuda:
                input_tensor = input_tensor.cuda(opt.devices[0])
            outputs = model(Variable(input_tensor, volatile=True)) 
            if not isinstance(outputs, list):
                outputs = [outputs]
            line["classify_res"] = list() 
            for index, out in enumerate(outputs):
                out = out.cpu()
                #print "out:", out
                softmax = F.softmax(out, dim=1).data.squeeze()
                #print "softmax:", softmax 
                probs, ids = softmax.sort(0, True)
                classify_res = {}
                for i in range(len(probs)):
                    classify_res[rid2name[index][id2rid[index][ids[i]]]] = probs[i]
                classify_res["max_score"] = probs[0]
                classify_res["best_label"] = rid2name[index][id2rid[index][ids[0]]]
                line["classify_res"].append(classify_res)
            l.write(json.dumps(line, separators=(',', ':'))+'\n')
    l.close()
    logging.info("classification done")
def main():
    # parse options
    op = Options()
    opt = op.parse()

    # special setting
    opt.shuffle = False
    opt.batch_size = 1
    opt.load_thread = 1

    # initialize train or test working dir
    test_dir = os.path.join(opt.classify_dir, opt.name)
    opt.model_dir = opt.dir + "/trainer_" + opt.name + "/Train/"
    if not os.path.exists(test_dir):
        os.mkdir(test_dir)

    # save options to disk
    opt2file(opt, os.path.join(test_dir, "opt.txt"))

    # log setting
    log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    formatter = logging.Formatter(log_format)
    fh = logging.FileHandler(test_dir + "/deploy.log", 'a')
    fh.setFormatter(formatter)
    ch = logging.StreamHandler()
    ch.setFormatter(formatter)
    logging.getLogger().addHandler(fh)
    logging.getLogger().addHandler(ch)
    logging.getLogger().setLevel(logging.INFO)

    # load label
    if opt.label_file == "":
        opt.label_file = opt.dir + "/label.txt"
    rid2name, id2rid, rid2id = load_label(opt.label_file)
    num_classes = [len(rid2name[index]) - 2 for index in range(len(rid2name))]

    # load transformer
    transformer = get_transformer(opt)

    # load model
    model = load_model(opt, num_classes)
    model.eval()

    # use cuda
    if opt.cuda:
        model = model.cuda(opt.devices[0])
        cudnn.benchmark = True

    l = open(test_dir + "/classify_res_data.txt", 'w')
    with open(opt.classify_dir + "/data.txt") as data:
        for num, line in enumerate(data):
            logging.info(str(num + 1))
            line = json.loads(line)
            input_tensor = load_image(line["image_file"], line["box"], opt,
                                      transformer)
            input_tensor = input_tensor.unsqueeze(0)
            if opt.cuda:
                input_tensor = input_tensor.cuda(opt.devices[0])
            outputs = model(Variable(input_tensor, volatile=True))
            if not isinstance(outputs, list):
                outputs = [outputs]
            line["classify_res"] = list()
            for index, out in enumerate(outputs):
                out = out.cpu()
                #print "out:", out
                softmax = F.softmax(out, dim=1).data.squeeze()
                #print "softmax:", softmax
                probs, ids = softmax.sort(0, True)
                classify_res = {}
                for i in range(len(probs)):
                    classify_res[rid2name[index][id2rid[index][
                        ids[i]]]] = probs[i]
                classify_res["max_score"] = probs[0]
                classify_res["best_label"] = rid2name[index][id2rid[index][
                    ids[0]]]
                line["classify_res"].append(classify_res)
            l.write(json.dumps(line, separators=(',', ':')) + '\n')
    l.close()
    logging.info("classification done")