def negset_create_patch(inputdir,outputpath,typefile,img_resize=None): subdir="negative/" count_num=0 files=get_path_files(inputdir+typefile+"/"+subdir) for j,eachfile in enumerate(files): filepre,fileext=splitext(eachfile) imgfile=inputdir+typefile+"/"+subdir+eachfile #if "AApPkxlfOdiMjMs" not in imgfile: # continue img=cv2.imread(imgfile) if img.any()==None: print("read image %s file failed",imgfile) if img_resize!=None: img=cv2.resize(img,img_resize) for i in range(20+10): rand.seed(time.time()) rand_x=rand.randint(0,img.shape[1]-patchsize) rand_y=rand.randint(0,img.shape[0]-patchsize) p=(rand_x,rand_y) print(p) x1,y1,x2,y2=get_boundingbox(p,patchsize,img.shape[1],img.shape[0]) #cv2.rectangle(img, (x1,y1),(x2,y2),(0, 255, 0),1) roiImg = img[y1:y2,x1:x2] #利用numpy中的数组切片设置ROI区域 filename= outputpath+typefile+"/"+subdir+filepre+str(i)+fileext cv2.imwrite(filename,roiImg) print('finshed write:',filename) count_num+=1 print("created negative pathch:",count_num)
def train_dict(args): """Fit a Dictionary with training data.""" # Create a dictionary model_dict = Dictionary(vocab_size=args.vocab_size, min_freq=args.min_freq, max_freq=args.max_freq) # Files to train files = get_path_files(args.dict_train_path) # Batch generator train_gen = read_files_batched(files, file_batch_size=args.file_batch_size, file_batch_shuffle=False, return_mode='array') # Fit dictionary in batches for docs in train_gen: tokens = [doc_to_tokens(doc) for doc in docs.flatten()] model_dict.fit_batch(tokens, prune_every_n=args.prune_every_n) # Save dict model_dict.lock() model_dict.save(args.dict_save_path)
def nonlabels_to_densemap(inputdir, outputdir, heatmap_size): files = get_path_files(inputdir) print(len(files)) count_num = 0 for eachfile in files: vspoints = [] filepre, fileext = splitext(eachfile) output = np.zeros((heatmap_size[0], heatmap_size[1]), dtype=np.float32) print('*', end=' ') np.savetxt(outputdir + filepre + '.csv', output, delimiter=',') count_num += 1 print("create image densemap csv:", count_num)
def org_to_roiimg(inpath, outpath): files = get_path_files(inpath) print(len(files)) count_num = 0 for i, file in enumerate(files): image = cv2.imread(inpath + file) if (image.all() == None): print("failed to read %s file", file) continue roi_img = cut_roi(image) count_num += 1 print(i, "create:", outpath + file) cv2.imwrite(outpath + file, roi_img) return count_num
def disp_imgroi(inpath): files = get_path_files(inpath) print(len(files)) for file in files: image = cv2.imread(inpath + file, 0) if (image.all() == None): print("failed to read file") y = get_roi_startY(image) print(y, file) imgbk = image.copy() cv2.rectangle(imgbk, (0, y - 140), (image.shape[1], y), (0, 255, 0), 2, 8, 0) cv2.imshow("image", imgbk) cv2.waitKey()
def _load_confs(self): """ 加载配置 :return: 配置字典 {key:domain,value:config} """ prefix = os.path.split(os.path.abspath(__file__))[0] path = os.sep.join([prefix, "configs"]) files = get_path_files(path) config = dict() for f in files: obj = load_json_file(f) domain = obj["domain"] config[domain] = obj["conf"] return config
def test_get_path_files(): print utils.get_path_files('.', patten='.*txt', type=3) print utils.get_path_files()
def decode_model(args): # Model model = Seq2Seq(model_dir=args.model_dir) # Files to be decoded files = get_path_files(args.test_data_path) # Batch generator # File batches file_gen = read_files_cycled(filenames=files, max_file_pool_size=args.max_file_pool_size, file_batch_size=args.file_batch_size, file_batch_shuffle=False) # Decode batches decode_gen = rebatch(file_gen, in_batch_size_limit=args.file_batch_size * args.max_file_pool_size, out_batch_size=args.batch_size, shuffle=False, flatten=True) # Decode write_target = False for batch_nb, batch in enumerate(decode_gen): print('Batch number {}'.format(batch_nb)) if batch_nb == 0: # Number of columns in batch n_cols = len(batch[0]) if n_cols == 1: source_docs = batch elif n_cols == 2: source_docs, target_docs = zip(*batch) write_target = True else: raise ValueError("Number of columns found %d not in [1,2]" \ % n_cols) # Output file handle and headers create_folder(os.path.dirname(args.decoded_data_path)) fout = open(args.decoded_data_path, 'w', encoding='utf8') fout.write('source\t') if write_target: fout.write('target\t') fout.write("\t".join([str(k) for k in range(args.beam_width)])) fout.write('\n') if write_target: source_docs, target_docs = zip(*batch) else: source_docs = batch # Get decoded documents: list of lists, with beams as elements decoded_docs = model.decode(source_docs) # Write beams to file for i in range(len(decoded_docs)): fout.write(source_docs[i] + '\t') if write_target: fout.write(target_docs[i] + '\t') for k in range(args.beam_width): decoded_doc = decoded_docs[i][k] fout.write(decoded_doc + '\t') fout.write('\n') fout.close()
def train_model(args): """Train sequence to sequence model with training files.""" # Parse model parameters model_params = dict([(arg, val) for arg, val in vars(args).items() if arg in get_Seq2Seq_model_param_names()]) # Model model = Seq2Seq(model_dir=args.model_dir, dict_path=args.dict_path, **model_params) # Train if args.train_data_path is not None: # Train files files = get_path_files(args.train_data_path) if args.shuffle_files: np.random.shuffle(files) # Batch generators # File batches file_gen = read_files_cycled( filenames=files, max_file_pool_size=args.max_file_pool_size, file_batch_size=args.file_batch_size, file_batch_shuffle=False) # Train batches train_gen = rebatch(file_gen, in_batch_size_limit=args.file_batch_size * args.max_file_pool_size, out_batch_size=args.batch_size, shuffle=args.shuffle_file_batches, flatten=True) if args.validation_data_path is not None: valid_data = read_file(args.validation_data_path, nrows=args.validate_n_rows) valid_source_docs, valid_target_docs = zip(*valid_data) # Train start = time.clock() for batch_nb, batch in enumerate(train_gen): source_docs, target_docs = zip(*batch) loss, global_step = model.train( source_docs, target_docs, dropout_rate=args.dropout_rate, optimizer=args.optimizer, learning_rate=args.learning_rate, max_gradient_norm=args.max_gradient_norm, max_seq_len=args.max_seq_len, save_every_n_batch=args.save_every_n_batch) # Print progress end = time.clock() samples = global_step * args.batch_size print('[{}] Training step: {} - Samples: {} - Loss: {:<.3f} - Time {:<.3f}'\ .format(str(datetime.now()), global_step, samples, loss, round(end-start,3))) start = end # Validation if args.validation_data_path is not None: if batch_nb % args.validate_every_n_batch == 0 and batch_nb > 0: loss, global_step = model.eval(valid_source_docs, valid_target_docs) end = time.clock() print('[{}] Validation step: {} - Samples: {} - Loss: {:<.3f} - Time {:<.3f}'\ .format(str(datetime.now()), global_step, samples, loss, round(end-start,3))) start = end else: print('Model created, but no training files were provided!')