def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    
    options=config.basic_config()
    
    tf.gfile.MakeDirs(options.model_save_dir)
    tf.gfile.MakeDirs(options.tokenized_data_dir)
    tf.gfile.MakeDirs(options.log_dir)
    
    data_model=data_process.create_train_data(options.raw_data_dir)
    options.vocab_size=data_model.vocab_size
    options.eos_id=data_model.eos_id
    log_steps=200
    do_train=True
    
    
    session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    session_config.gpu_options.allow_growth = True
    run_config=tf.estimator.RunConfig(
            model_dir=options.model_save_dir,
            log_step_count_steps=log_steps,
            session_config=session_config)
    
    model_fn=model_fn_builder(options)
    estimator=tf.estimator.Estimator(model_fn=model_fn,model_dir=options.model_save_dir,config=run_config)
    if do_train:
        option_file=os.path.join(options.model_save_dir,'options.pkl')
        with open(option_file,'wb') as f:
            pickle.dump(options.__dict__,f,-1)
        tf.logging.info("*** options ***")
        for key in options.__dict__:
            tf.logging.info("\t{}:{}".format(key,options.__dict__[key]))
        tf_record_file='./data/sample.tf_record'
        estimator.train(input_fn=train_input_fn(tf_record_file,options))
def create_train_data(data_dir):
    from utils import Tokenizer, get_logger
    from config import basic_config
    logger = get_logger('log', './log/log.txt')
    t = Tokenizer(logger)
    model = Data.pre_process_data(data_dir, t, basic_config(), logger)
    model.create_tf_record_file(model.sample_file)
    return model
Exemplo n.º 3
0
 def _pre_process(self):
     self.model_file = os.path.join(self.model_dir, 'model.ckpt')
     self.meta_file = os.path.join(self.model_dir, 'model.ckpt.meta')
     var_file = os.path.join(self.model_dir, 'var.pkl')
     with open(var_file, 'rb') as f:
         self.var, self.config = pickle.load(f)
     basic_config = config.basic_config()
     basic_config.__dict__.update(self.config)
     self.config = basic_config
     vocab_file = './data/vocab.txt'
     self.data_tools = Data(vocab_file, None, basic_config, logging)
     self.tokenizer = Tokenizer(logging)
Exemplo n.º 4
0
    def load_data(self):
        cfg = basic_config(self.newDir)

        if self.method == 'faster':
            cfg.faster_rcnn_config()
        elif self.method == 'fast':
            cfg.fast_rcnn_config()
        elif self.method == 'normal':
            cfg.rcnn_config()
        cfg = mcfg.rcnn_hoi_classes(cfg)
        cfg = set_config(cfg)
        cfg.get_args()
        cfg.dataset = 'HICO'
        cfg.update_paths()

        trainMeta = utils.load_dict(cfg.data_path + 'train')
        testMeta = utils.load_dict(cfg.data_path + 'test')

        trainGTMeta = utils.load_dict(cfg.data_path + 'train_GT')
        testGTMeta = utils.load_dict(cfg.data_path + 'test_GT')
        labels = utils.load_dict(cfg.data_path + 'labels')
        class_mapping = utils.load_dict(cfg.data_path + 'class_mapping')

        if cfg.max_classes is not None:
            # Reduce data to include only max_classes number of different classes
            _, counts = utils.getLabelStats(trainGTMeta, labels)
            reduced_idxs = utils.getReducedIdxs(counts, cfg.max_classes,
                                                labels)
            trainGTMeta = utils.reduceData(trainGTMeta, reduced_idxs)
            testGTMeta = utils.reduceData(testGTMeta, reduced_idxs)
            trainMeta = utils.reduceData(trainMeta, reduced_idxs)
            testMeta = utils.reduceData(testMeta, reduced_idxs)
            labels = utils.idxs2labels(reduced_idxs, labels)

        cfg.nb_classes = len(labels)
        cfg.set_class_weights(labels, trainGTMeta)
        _, valMeta = utils.splitData(list(trainMeta.keys()), trainMeta)
        self.cfg = cfg

        if cfg.move:
            self.move_data()

        print('Data:', cfg.data_path)
        print('Path:', cfg.my_results_path)

        self.labels = labels
        self.class_mapping = class_mapping
        self.trainMeta = trainMeta
        self.valMeta = valMeta
        self.testMeta = testMeta
        self.trainGTMeta = trainGTMeta
        self.testGTMeta = testGTMeta
Exemplo n.º 5
0
def release_model(**kwargs):
    release_dir = kwargs.get("release_dir", './release')
    restore_dir = kwargs.get('restore_dir', './out')
    if not os.path.isdir(release_dir):
        print("Create release dir:{}".format(release_dir))
        os.mkdir(release_dir)
    for file in glob.glob(os.path.join(release_dir, '*')):
        print("Remove previous file:{}".format(file))
        os.remove(file)
    # release后保存的模型文件,参数文件
    release_model_file = os.path.join(release_dir, 'model.ckpt')
    release_var_file = os.path.join(release_dir, 'var.pkl')
    # restore 的文件
    restore_step = kwargs.get('steps')
    if restore_step:
        restore_model_file = os.path.join(restore_dir, 'model.ckpt-{}'.format(restore_step))
    else:
        restore_model_file = tf.train.get_checkpoint_state(restore_dir).model_checkpoint_path
    restore_var_file = os.path.join(restore_dir, 'options.pkl')
    with open(restore_var_file, 'rb') as f:
        options = pickle.load(f)
        basic_config = config.basic_config()
        basic_config.__dict__.update(options)
        basic_config.beam_size = 2
    g = tf.Graph()
    with g.as_default():
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        with tf.Session(config=sess_config) as sess:
            input_ids = tf.placeholder(tf.int64, [None, None], name='input_ids')
            with tf.variable_scope('model'):
                model = transformer.Transformer(basic_config, False)
                out_res = model(input_ids, eos_id=basic_config.eos_id)
            top_decoded_ids = out_res['outputs']
            scores = out_res['scores']
            # print(top_decoded_ids.name)
            # print(scores.name)
            saver = tf.train.Saver()
            saver.restore(sess, restore_model_file)
            saver.save(sess, release_model_file)
            _vars = {'input_ids': input_ids.name, 'decode_ids': top_decoded_ids.name, 'scores': scores.name}
            with open(release_var_file, 'wb') as f:
                pickle.dump((_vars, options), f, -1)
            # res=sess.run(top_decoded_ids,{input_ids:np.array([[2,3,4,5]],dtype=np.int32)})
            # print(res)
            # print(res[0].shape)
            # print(res[1]['k'].shape)
            # print(res[1]['w'].shape)
            print("Done!")
Exemplo n.º 6
0
def create_train_data(data_dir):
    from utils import Tokenizer,get_logger
    from config import basic_config
    logger=get_logger('log','./log/log.txt')
    t=Tokenizer(logger)
    model=Data.pre_process_data(data_dir,t,basic_config(),logger)
    model.create_tf_record_file(model.sample_file)
    return model
    #data=d()
    #num=0
    #i=data.make_initializable_iterator()
    #while i.get_next():
    #    num+=1
    #    if num%100000==0:
    #        print(num)
    #print(num)
    
            
Exemplo n.º 7
0
    def load_data(self):
        cfg = basic_config(self.newDir)
        cfg = set_config(cfg)
        cfg.get_args()
        cfg.update_paths()

        trainMeta = utils.load_dict(cfg.data_path + 'train')
        testMeta = utils.load_dict(cfg.data_path + 'test')

        trainGTMeta = utils.load_dict(cfg.data_path + 'train_GT')
        testGTMeta = utils.load_dict(cfg.data_path + 'test_GT')
        labels = utils.load_dict(cfg.data_path + 'labels')

        if cfg.max_classes is not None:
            # Reduce data to include only max_classes number of different classes
            _, counts = utils.getLabelStats(trainGTMeta, labels)
            trainGTMeta, reduced_idxs = utils.reduceTrainData(
                trainGTMeta, counts, cfg.max_classes)
            testGTMeta = utils.reduceTestData(testGTMeta, reduced_idxs)
            trainMeta = utils.reduceTestData(trainMeta, reduced_idxs)
            testMeta = utils.reduceTestData(testMeta, reduced_idxs)
            labels = utils.idxs2labels(reduced_idxs, labels)

        cfg.nb_classes = len(labels)
        cfg.set_class_weights(labels, trainGTMeta)
        _, valMeta = utils.splitData(list(trainMeta.keys()), trainMeta)
        self.cfg = cfg

        if cfg.move:
            self.move_data()

        print('Path:', cfg.my_results_path)

        self.labels = labels
        self.trainMeta = trainMeta
        self.valMeta = valMeta
        self.testMeta = testMeta
        self.trainGTMeta = trainGTMeta
        self.testGTMeta = testGTMeta
Exemplo n.º 8
0
                relID += 1
        new_imagesMeta[imageID] = {
            'imageName': imageMeta['imageName'],
            'rels': rels
        }
    return new_imagesMeta

if __name__ == "__main__":
    #    metaData = sio.loadmat(url + 'anno.mat', struct_as_record=False, squeeze_me=True)
    bbData = sio.loadmat(url + 'anno_bbox.mat',
                         struct_as_record=False,
                         squeeze_me=True)
    #    actions = bbData['list_action']
    #    trainYMatrix = metaData['anno_train']
    bbDataTrain = bbData['bbox_train']
    cfg = basic_config()
    cfg = set_config(cfg)
    cfg.dataset = 'HICO'
    cfg.get_data_path()
    cfg.get_results_paths()
    labels = utils.load_dict(cfg.data_path + 'labels')
    print("Extract meta data")
    tmpTrainMeta = extractMetaData(bbDataTrain)
    print("Combine similar BBs")
    newTrainMeta = combineSimilarBBs(tmpTrainMeta, labels, 0.4)
    newTrainMetaID = list(newTrainMeta.keys())
    newTrainMetaID.sort()
    #    imagesID = imagesID[6490:7000]
    #    images = pp.loadImages(imagesID, imagesMeta, url+"images/train2015/")
    #    [dataXP, dataXB, dataY, dataMeta] = pp.getData(imagesID, imagesMeta, images, (224,244))
    #    trainYMatrix = pp.getMatrixLabels(len(actions), dataY)
Exemplo n.º 9
0
def main():
    config.basic_config(app='pantastic',
                        config_spec='configspec.ini',
                        description=__description__,
                        version=__version__)
    logging.basicConfig(filename=config.setting['log_file'],
                        level=config.LOG_LEVELS[config.setting['log_level']])
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    ignore_cards = []
    if config.setting['ignore_cards'] != '':
        with open(config.setting['ignore_cards'], 'r') as ignore_cards_handle:
            ignore_cards = ignore_cards_handle.read().splitlines()

    ignore_iins = []
    if config.setting['ignore_iins'] != '':
        with open(config.setting['ignore_iins'], 'r') as ignore_iins_handle:
            ignore_iins = ignore_iins_handle.read().splitlines()

    ignore_industries = []
    if config.setting['ignore_industries'] != '':
        with open(config.setting['ignore_industries'],
                  'r') as ignore_industries_handle:
            ignore_industries = ignore_industries_handle.read().splitlines()

    ignore_paths = []
    if config.setting['ignore_paths'] != '':
        with open(config.setting['ignore_paths'], 'r') as ignore_paths_handle:
            ignore_paths = ignore_paths_handle.read().splitlines()

    ignore_file_extensions = []
    if config.setting['ignore_file_extensions'] != '':
        with open(config.setting['ignore_file_extensions'],
                  'r') as ignore_file_extensions_handle:
            ignore_file_extensions = ignore_file_extensions_handle.read(
            ).splitlines()

    if not config.setting['verbose'] and config.setting['output'] == '':
        logging.error(
            'No output type specified, either set an output file with --output or turn verbose mode on'
        )
        return EXIT_PARAM_ERROR

    pan_manager = Pantastic(
        ignore_cards=ignore_cards,
        ignore_iins=ignore_iins,
        ignore_industries=ignore_industries,
        include_deprecated=config.setting['include_deprecated'],
        minimum_digits=config.setting['minimum_digits'],
        maximum_digits=config.setting['maximum_digits'],
        cards_per_file=config.setting['cards_per_file'],
        ignore_file_extensions=ignore_file_extensions,
        unmask_card_number=config.setting['unmask_card_number'],
        max_group_count=config.setting['max_group_count'],
        max_group_distance=config.setting['max_group_distance'],
        output=config.setting['output'],
        ignore_paths=ignore_paths,
        verbose=config.setting['verbose'])

    if config.setting['dir'] != '':
        pan_manager.scan_location(config.setting['dir'])

    if config.setting['file'] != '':
        pan_manager.scan_file_with_output(config.setting['file'])

    return EXIT_OK
Exemplo n.º 10
0
@author: aag14
"""
import sys

sys.path.append('../../../')
sys.path.append('../../shared/')
sys.path.append('../models/')
sys.path.append('../filters/')
sys.path.append('../data/')

from config import basic_config
import groundtruths
import numpy as np

cfg = basic_config(False)
cfg.nb_hoi_rois = 16
cfg.nb_hoi_classes = 4
cfg.hoi_max_overlap = 0.5
cfg.hoi_min_overlap = 0.1
cfg.nb_hoi_positives = 2
cfg.nb_hoi_negatives1 = 6
cfg.nb_hoi_negatives2 = 8
cfg.rpn_stride = 16

imageDims = {'shape': (100, 100, 3), 'scale': [1, 1]}

hbboxes = [{
    'xmin': 0,
    'xmax': 20,
    'ymin': 0,