def data_load(self, label): data_path = os.path.join(self.opt.dataroot, self.opt.cap_scheme, label) self.num_train_samples = min( self.opt.train_size, len(os.listdir(os.path.join(data_path, 'train')))) # num_test_samples = len(os.listdir(os.path.join(data_path, 'test'))) self.num_test_sample = min( 2000, len(os.listdir(os.path.join(data_path, 'test')))) # load training set x_train = np.empty((self.num_train_samples, self.opt.loadHeight, self.opt.loadWidth, 1), dtype='uint8') y_train = np.empty( (self.num_train_samples, self.opt.cap_len * self.opt.char_set_len), dtype='uint8') train_labels = util.load_label( os.path.join(data_path, label + '_train.txt')) for i in range(self.num_train_samples): # print(i) img_name = os.path.join(data_path, 'train', str(i) + '.jpg') x_train[i, :, :, :] = util.load_image(img_name) y_train[i, :] = self.text2vec(train_labels[i]) # load testing set x_test = np.empty( (self.num_test_sample, self.opt.loadHeight, self.opt.loadWidth, 1), dtype='uint8') y_test = np.empty( (self.num_test_sample, self.opt.cap_len * self.opt.char_set_len), dtype='uint8') test_labels = util.load_label( os.path.join(data_path, label + '_test.txt')) for i in range(self.num_test_sample): # print(i) img_name = os.path.join(data_path, 'test', str(i) + '.jpg') x_test[i, :, :, :] = util.load_image(img_name) y_test[i, :] = self.text2vec(test_labels[i]) return (x_train, y_train), (x_test, y_test)
def __init__(self, opt): self.opt = opt assert os.path.exists( opt.dir + "/data.txt"), "No data.txt found in specified dir" assert os.path.exists( opt.dir + "/label.txt"), "No label.txt found in specified dir" train_dir = opt.data_dir + "/TrainSet/" val_dir = opt.data_dir + "/ValidateSet/" test_dir = opt.data_dir + "/TestSet/" # split data if not all([ os.path.exists(train_dir), os.path.exists(val_dir), os.path.exists(test_dir) ]): # rm existing directories rmdir(train_dir) rmdir(val_dir) rmdir(test_dir) # split data to Train, Val, Test logging.info("Split raw data to Train, Val and Test") ratios = opt.ratio dataset = collections.defaultdict(list) with open(opt.dir + '/data.txt', encoding="utf-8") as d: for line in d.readlines(): line = json.loads(line) # if data has been specified data_type yet, load data as what was specified before if "type" in line: dataset[line["type"]].append(line) continue # specified data_type randomly rand = random.random() if rand < ratios[0]: data_type = "Train" elif rand < ratios[0] + ratios[1]: data_type = "Validate" else: data_type = "Test" dataset[data_type].append(line) # write to file self._WriteDataToFile(dataset["Train"], train_dir) self._WriteDataToFile(dataset["Validate"], val_dir) self._WriteDataToFile(dataset["Test"], test_dir) self.rid2name, self.id2rid, self.rid2id = load_label(opt.dir + '/label.txt') self.num_classes = [len(item) - 2 for item in self.rid2name] # load dataset if opt.mode == "Train": logging.info("Load Train Dataset...") self.train_set = BaseDataset(self.opt, "TrainSet", self.rid2id) logging.info("Load Validate Dataset...") self.val_set = BaseDataset(self.opt, "ValidateSet", self.rid2id) else: # force batch_size for test to 1 self.opt.batch_size = 1 self.opt.load_thread = 1 logging.info("Load Test Dataset...") self.test_set = BaseDataset(self.opt, "TestSet", self.rid2id)
def main(): # parse options op = Options() opt = op.parse() # special setting opt.shuffle = False opt.batch_size = 1 opt.load_thread = 1 # initialize train or test working dir test_dir = os.path.join(opt.classify_dir , opt.name) opt.model_dir = opt.dir + "/trainer_" + opt.name + "/Train/" if not os.path.exists(test_dir): os.mkdir(test_dir) # save options to disk opt2file(opt, os.path.join(test_dir, "opt.txt")) # log setting log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' formatter = logging.Formatter(log_format) fh = logging.FileHandler(test_dir + "/deploy.log", 'a') fh.setFormatter(formatter) ch = logging.StreamHandler() ch.setFormatter(formatter) logging.getLogger().addHandler(fh) logging.getLogger().addHandler(ch) logging.getLogger().setLevel(logging.INFO) # load label if opt.label_file == "": opt.label_file = opt.dir + "/label.txt" rid2name, id2rid, rid2id = load_label(opt.label_file) num_classes = [len(rid2name[index])-2 for index in range(len(rid2name))] # load transformer transformer = get_transformer(opt) # load model model = load_model(opt, num_classes) model.eval() # use cuda if opt.cuda: model = model.cuda(opt.devices[0]) cudnn.benchmark = True l = open(test_dir + "/classify_res_data.txt", 'w') with open(opt.classify_dir + "/data.txt") as data: for num, line in enumerate(data): logging.info(str(num+1)) line = json.loads(line) input_tensor = load_image(line["image_file"], line["box"], opt, transformer) input_tensor = input_tensor.unsqueeze(0) if opt.cuda: input_tensor = input_tensor.cuda(opt.devices[0]) outputs = model(Variable(input_tensor, volatile=True)) if not isinstance(outputs, list): outputs = [outputs] line["classify_res"] = list() for index, out in enumerate(outputs): out = out.cpu() #print "out:", out softmax = F.softmax(out, dim=1).data.squeeze() #print "softmax:", softmax probs, ids = softmax.sort(0, True) classify_res = {} for i in range(len(probs)): classify_res[rid2name[index][id2rid[index][ids[i]]]] = probs[i] classify_res["max_score"] = probs[0] classify_res["best_label"] = rid2name[index][id2rid[index][ids[0]]] line["classify_res"].append(classify_res) l.write(json.dumps(line, separators=(',', ':'))+'\n') l.close() logging.info("classification done")
def main(): # parse options op = Options() opt = op.parse() # special setting opt.shuffle = False opt.batch_size = 1 opt.load_thread = 1 # initialize train or test working dir test_dir = os.path.join(opt.classify_dir, opt.name) opt.model_dir = opt.dir + "/trainer_" + opt.name + "/Train/" if not os.path.exists(test_dir): os.mkdir(test_dir) # save options to disk opt2file(opt, os.path.join(test_dir, "opt.txt")) # log setting log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' formatter = logging.Formatter(log_format) fh = logging.FileHandler(test_dir + "/deploy.log", 'a') fh.setFormatter(formatter) ch = logging.StreamHandler() ch.setFormatter(formatter) logging.getLogger().addHandler(fh) logging.getLogger().addHandler(ch) logging.getLogger().setLevel(logging.INFO) # load label if opt.label_file == "": opt.label_file = opt.dir + "/label.txt" rid2name, id2rid, rid2id = load_label(opt.label_file) num_classes = [len(rid2name[index]) - 2 for index in range(len(rid2name))] # load transformer transformer = get_transformer(opt) # load model model = load_model(opt, num_classes) model.eval() # use cuda if opt.cuda: model = model.cuda(opt.devices[0]) cudnn.benchmark = True l = open(test_dir + "/classify_res_data.txt", 'w') with open(opt.classify_dir + "/data.txt") as data: for num, line in enumerate(data): logging.info(str(num + 1)) line = json.loads(line) input_tensor = load_image(line["image_file"], line["box"], opt, transformer) input_tensor = input_tensor.unsqueeze(0) if opt.cuda: input_tensor = input_tensor.cuda(opt.devices[0]) outputs = model(Variable(input_tensor, volatile=True)) if not isinstance(outputs, list): outputs = [outputs] line["classify_res"] = list() for index, out in enumerate(outputs): out = out.cpu() #print "out:", out softmax = F.softmax(out, dim=1).data.squeeze() #print "softmax:", softmax probs, ids = softmax.sort(0, True) classify_res = {} for i in range(len(probs)): classify_res[rid2name[index][id2rid[index][ ids[i]]]] = probs[i] classify_res["max_score"] = probs[0] classify_res["best_label"] = rid2name[index][id2rid[index][ ids[0]]] line["classify_res"].append(classify_res) l.write(json.dumps(line, separators=(',', ':')) + '\n') l.close() logging.info("classification done")