def negset_create_patch(inputdir,outputpath,typefile,img_resize=None):
    subdir="negative/"
    count_num=0
    files=get_path_files(inputdir+typefile+"/"+subdir)
    for j,eachfile in enumerate(files):
        filepre,fileext=splitext(eachfile)
        imgfile=inputdir+typefile+"/"+subdir+eachfile
        #if "AApPkxlfOdiMjMs" not in imgfile:
        #    continue
        img=cv2.imread(imgfile)
        if img.any()==None:
            print("read image %s file failed",imgfile)
        if img_resize!=None:
            img=cv2.resize(img,img_resize)
        for i in range(20+10):
            rand.seed(time.time())
            rand_x=rand.randint(0,img.shape[1]-patchsize)  
            rand_y=rand.randint(0,img.shape[0]-patchsize) 
            p=(rand_x,rand_y) 
            print(p)
            x1,y1,x2,y2=get_boundingbox(p,patchsize,img.shape[1],img.shape[0])
            #cv2.rectangle(img, (x1,y1),(x2,y2),(0, 255, 0),1)
            roiImg = img[y1:y2,x1:x2] #利用numpy中的数组切片设置ROI区域
            filename= outputpath+typefile+"/"+subdir+filepre+str(i)+fileext
            cv2.imwrite(filename,roiImg)
            print('finshed write:',filename)
            count_num+=1
    print("created negative pathch:",count_num)
Exemplo n.º 2
0
def train_dict(args):
    """Fit a Dictionary with training data."""

    # Create a dictionary
    model_dict = Dictionary(vocab_size=args.vocab_size,
                            min_freq=args.min_freq,
                            max_freq=args.max_freq)

    # Files to train
    files = get_path_files(args.dict_train_path)

    # Batch generator
    train_gen = read_files_batched(files,
                                   file_batch_size=args.file_batch_size,
                                   file_batch_shuffle=False,
                                   return_mode='array')

    # Fit dictionary in batches
    for docs in train_gen:
        tokens = [doc_to_tokens(doc) for doc in docs.flatten()]
        model_dict.fit_batch(tokens, prune_every_n=args.prune_every_n)

    # Save dict
    model_dict.lock()
    model_dict.save(args.dict_save_path)
def nonlabels_to_densemap(inputdir, outputdir, heatmap_size):
    files = get_path_files(inputdir)
    print(len(files))
    count_num = 0
    for eachfile in files:
        vspoints = []
        filepre, fileext = splitext(eachfile)
        output = np.zeros((heatmap_size[0], heatmap_size[1]), dtype=np.float32)
        print('*', end=' ')
        np.savetxt(outputdir + filepre + '.csv', output, delimiter=',')
        count_num += 1
    print("create image densemap csv:", count_num)
Exemplo n.º 4
0
def org_to_roiimg(inpath, outpath):
    files = get_path_files(inpath)
    print(len(files))
    count_num = 0
    for i, file in enumerate(files):
        image = cv2.imread(inpath + file)
        if (image.all() == None):
            print("failed to read %s file", file)
            continue
        roi_img = cut_roi(image)
        count_num += 1
        print(i, "create:", outpath + file)
        cv2.imwrite(outpath + file, roi_img)
    return count_num
Exemplo n.º 5
0
def disp_imgroi(inpath):
    files = get_path_files(inpath)
    print(len(files))
    for file in files:
        image = cv2.imread(inpath + file, 0)
        if (image.all() == None):
            print("failed to read file")
        y = get_roi_startY(image)
        print(y, file)
        imgbk = image.copy()
        cv2.rectangle(imgbk, (0, y - 140), (image.shape[1], y), (0, 255, 0), 2,
                      8, 0)
        cv2.imshow("image", imgbk)
        cv2.waitKey()
Exemplo n.º 6
0
    def _load_confs(self):
        """
        加载配置
        :return: 配置字典 {key:domain,value:config}
        """
        prefix = os.path.split(os.path.abspath(__file__))[0]
        path = os.sep.join([prefix, "configs"])
        files = get_path_files(path)
        config = dict()

        for f in files:
            obj = load_json_file(f)
            domain = obj["domain"]
            config[domain] = obj["conf"]
        return config
Exemplo n.º 7
0
def test_get_path_files():
    print utils.get_path_files('.', patten='.*txt', type=3)
    print utils.get_path_files()
Exemplo n.º 8
0
def decode_model(args):

    # Model
    model = Seq2Seq(model_dir=args.model_dir)

    # Files to be decoded
    files = get_path_files(args.test_data_path)

    # Batch generator
    # File batches
    file_gen = read_files_cycled(filenames=files,
                                 max_file_pool_size=args.max_file_pool_size,
                                 file_batch_size=args.file_batch_size,
                                 file_batch_shuffle=False)

    # Decode batches
    decode_gen = rebatch(file_gen,
                         in_batch_size_limit=args.file_batch_size *
                         args.max_file_pool_size,
                         out_batch_size=args.batch_size,
                         shuffle=False,
                         flatten=True)

    # Decode
    write_target = False
    for batch_nb, batch in enumerate(decode_gen):

        print('Batch number {}'.format(batch_nb))

        if batch_nb == 0:
            # Number of columns in batch
            n_cols = len(batch[0])

            if n_cols == 1:
                source_docs = batch
            elif n_cols == 2:
                source_docs, target_docs = zip(*batch)
                write_target = True
            else:
                raise ValueError("Number of columns found %d not in [1,2]" \
                                  % n_cols)

            # Output file handle and headers
            create_folder(os.path.dirname(args.decoded_data_path))
            fout = open(args.decoded_data_path, 'w', encoding='utf8')
            fout.write('source\t')
            if write_target:
                fout.write('target\t')
            fout.write("\t".join([str(k) for k in range(args.beam_width)]))
            fout.write('\n')

        if write_target:
            source_docs, target_docs = zip(*batch)
        else:
            source_docs = batch

        # Get decoded documents: list of lists, with beams as elements
        decoded_docs = model.decode(source_docs)

        # Write beams to file
        for i in range(len(decoded_docs)):
            fout.write(source_docs[i] + '\t')
            if write_target:
                fout.write(target_docs[i] + '\t')
            for k in range(args.beam_width):
                decoded_doc = decoded_docs[i][k]
                fout.write(decoded_doc + '\t')

            fout.write('\n')

    fout.close()
Exemplo n.º 9
0
def train_model(args):
    """Train sequence to sequence model with training files."""

    # Parse model parameters
    model_params = dict([(arg, val) for arg, val in vars(args).items()
                         if arg in get_Seq2Seq_model_param_names()])

    # Model
    model = Seq2Seq(model_dir=args.model_dir,
                    dict_path=args.dict_path,
                    **model_params)

    # Train
    if args.train_data_path is not None:

        # Train files
        files = get_path_files(args.train_data_path)

        if args.shuffle_files:
            np.random.shuffle(files)

        # Batch generators
        # File batches
        file_gen = read_files_cycled(
            filenames=files,
            max_file_pool_size=args.max_file_pool_size,
            file_batch_size=args.file_batch_size,
            file_batch_shuffle=False)

        # Train batches
        train_gen = rebatch(file_gen,
                            in_batch_size_limit=args.file_batch_size *
                            args.max_file_pool_size,
                            out_batch_size=args.batch_size,
                            shuffle=args.shuffle_file_batches,
                            flatten=True)

        if args.validation_data_path is not None:
            valid_data = read_file(args.validation_data_path,
                                   nrows=args.validate_n_rows)
            valid_source_docs, valid_target_docs = zip(*valid_data)

        # Train
        start = time.clock()
        for batch_nb, batch in enumerate(train_gen):
            source_docs, target_docs = zip(*batch)
            loss, global_step = model.train(
                source_docs,
                target_docs,
                dropout_rate=args.dropout_rate,
                optimizer=args.optimizer,
                learning_rate=args.learning_rate,
                max_gradient_norm=args.max_gradient_norm,
                max_seq_len=args.max_seq_len,
                save_every_n_batch=args.save_every_n_batch)

            # Print progress
            end = time.clock()
            samples = global_step * args.batch_size
            print('[{}] Training step: {} - Samples: {} - Loss: {:<.3f} - Time {:<.3f}'\
                .format(str(datetime.now()), global_step, samples, loss, round(end-start,3)))
            start = end

            # Validation
            if args.validation_data_path is not None:
                if batch_nb % args.validate_every_n_batch == 0 and batch_nb > 0:
                    loss, global_step = model.eval(valid_source_docs,
                                                   valid_target_docs)
                    end = time.clock()
                    print('[{}] Validation step: {} - Samples: {} - Loss: {:<.3f} - Time {:<.3f}'\
                    .format(str(datetime.now()), global_step, samples, loss, round(end-start,3)))
                    start = end

    else:
        print('Model created, but no training files were provided!')