Exemplo n.º 1
0
def main(args):

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)

    default_path = create_default_path()
    print('\n*** Set default saving/loading path to:', default_path)

    if args.dataset == AIFB or args.dataset == MUTAG:
        module = importlib.import_module(MODULE.format('dglrgcn'))
        data = module.load_dglrgcn(args.data_path)
        data = to_cuda(data) if cuda else data
        mode = NODE_CLASSIFICATION
    elif args.dataset == MUTAGENICITY or args.dataset == PTC_MR or args.dataset == PTC_MM or args.dataset == PTC_FR or args.dataset == PTC_FM:
        module = importlib.import_module(MODULE.format('dortmund'))
        data = module.load_dortmund(args.data_path)
        data = to_cuda(data) if cuda else data
        mode = GRAPH_CLASSIFICATION
    else:
        raise ValueError('Unable to load dataset', args.dataset)

    print_graph_stats(data[GRAPH])

    config_params = read_params(args.config_fpath, verbose=True)

    # create GNN model
    model = Model(g=data[GRAPH],
                  config_params=config_params[0],
                  n_classes=data[N_CLASSES],
                  n_rels=data[N_RELS] if N_RELS in data else None,
                  n_entities=data[N_ENTITIES] if N_ENTITIES in data else None,
                  is_cuda=cuda,
                  mode=mode)

    if cuda:
        model.cuda()

    # 1. Training
    app = App()
    learning_config = {
        'lr': args.lr,
        'n_epochs': args.n_epochs,
        'weight_decay': args.weight_decay,
        'batch_size': args.batch_size,
        'cuda': cuda
    }
    print('\n*** Start training ***\n')
    app.train(data, config_params[0], learning_config, default_path, mode=mode)

    # 2. Testing
    print('\n*** Start testing ***\n')
    app.test(data, default_path, mode=mode)

    # 3. Delete model
    remove_model(default_path)
Exemplo n.º 2
0
    def load_args(self,
                  report_dir_name=None,
                  report_dir_path=None,
                  report_file_name=None):
        self.args = read_params(CONFIG_PATH, verbose=False)

        self.report_dir_name = report_dir_name
        self.report_dir_path = report_dir_path
        self.report_file_name = report_file_name

        self.args['config_fpath'] = CONFIG_PATH

        self.args['from_pickle'] = False

        # At initialization, task_ids = None.
        # set folders to these:
        self.args[
            "input_report_folder"] = cf.__ROOT__ + '/api_tasks/data_report'
        self.args[
            "input_data_folder"] = cf.__ROOT__ + '/api_tasks/data_json/{}'.format(
                report_dir_name)
        self.args[
            "input_pickle_folder"] = cf.__ROOT__ + '/api_tasks/data_pickle/{}'.format(
                report_dir_name)

        self.args[
            "mapping_path"] = cf.__ROOT__ + '/' + self.args["mapping_path"]
        self.args["train_embedding_path"] = cf.__ROOT__ + '/' + self.args[
            "train_embedding_path"]
        self.args["vocab_path"] = cf.__ROOT__ + '/' + self.args["vocab_path"]

        # self.args["graph_viz_dir"] = cf.__ROOT__+'/data_graphviz'

        # self.args["from_pickle"] = False
        # self.args["from_folder"] = False
        # self.args["from_json"] = False

        self.args["prepare_word_embedding"] = True
        self.args["train_embedder"] = False

        self.args["reverse_edge"] = __REVERSE_EDGE__

        del self.args['train_list_file']
        del self.args['test_list_file']

        self.args['do_draw'] = __DO_DRAW__

        # for predict
        fconfig_name = self.args['config_fpath'].split('/')[-1]
        self.odir = self.args['config_fpath'].split(fconfig_name)[0]

        self.model_config = self.args['model_configs']

        self.args['checkpoint_file'] = self.odir + '/checkpoint'
Exemplo n.º 3
0
def prepare_files(task_ids=None, cuda=True):
    args = read_params(CONFIG_PATH, verbose=False)

    args['config_fpath'] = CONFIG_PATH

    if task_ids is not None:
        task_ids = [str(tid) for tid in task_ids]
        batch_task_name = '-'.join(task_ids)
        args["input_report_folder"] = cf.__ROOT__ + '/api_tasks/data_report'
        args[
            "input_data_folder"] = cf.__ROOT__ + '/api_tasks/data_json/{}'.format(
                batch_task_name)
        args[
            "input_pickle_folder"] = cf.__ROOT__ + '/api_tasks/data_pickle/{}'.format(
                batch_task_name)

    args["mapping_path"] = cf.__ROOT__ + '/' + args["mapping_path"]
    args["train_embedding_path"] = cf.__ROOT__ + '/' + args[
        "train_embedding_path"]
    args["vocab_path"] = cf.__ROOT__ + '/' + args["vocab_path"]

    args["graph_viz_dir"] = cf.__ROOT__ + '/data/graphviz'

    args["from_pickle"] = False
    args["from_report_folder"] = False
    args["from_data_json"] = False

    args["prepare_word_embedding"] = True
    args["train_embedder"] = False

    args["reverse_edge"] = __REVERSE_EDGE__

    del args['train_list_file']
    del args['test_list_file']

    args['do_draw'] = __DO_DRAW__

    prep_data = PrepareData(args)
    data = prep_data.load_data_files(task_ids)
    if data is None:
        return None, args

    data = to_cuda(data) if cuda is True else data
    return data, args
Exemplo n.º 4
0
    def load_args(self, task_ids=None):
        self.args = read_params(CONFIG_PATH, verbose=False)

        self.args['config_fpath'] = CONFIG_PATH

        if task_ids is not None:
            task_ids = [str(tid) for tid in task_ids]
            batch_task_name = '-'.join(task_ids)
            self.args["input_report_folder"] = cf.__ROOT__+'/api_tasks/data_report'
            self.args["input_data_folder"] = cf.__ROOT__+'/api_tasks/data_json/{}'.format(batch_task_name)
            self.args["input_pickle_folder"] = cf.__ROOT__+'/api_tasks/data_pickle/{}'.format(batch_task_name)
        
        self.args["mapping_path"] = cf.__ROOT__+'/'+self.args["mapping_path"]
        self.args["train_embedding_path"] = cf.__ROOT__+'/'+self.args["train_embedding_path"]
        self.args["vocab_path"] = cf.__ROOT__+'/'+self.args["vocab_path"]

        self.args["graph_viz_dir"] = cf.__ROOT__+'/data/graphviz'

        self.args["from_pickle"] = False
        self.args["from_report_folder"] = False
        self.args["from_data_json"] = False
        
        self.args["prepare_word_embedding"] = True
        self.args["train_embedder"] = False

        self.args["reverse_edge"] = __REVERSE_EDGE__

        del self.args['train_list_file']
        del self.args['test_list_file']

        self.args['do_draw'] = __DO_DRAW__


        # for predict
        fconfig_name = self.args['config_fpath'].split('/')[-1]
        odir = self.args['config_fpath'].split(fconfig_name)[0]

        self.model_config = self.args['model_configs']

        self.args['checkpoint_file'] = odir+'/checkpoint'
Exemplo n.º 5
0
def run_app_2(args, data, cuda):
    # config_params = read_params(args.config_fpath, verbose=True)
    odir = args.out_dir
    config_fpath = odir + '/config_edGNN_graph_class.json'
    print('*** Load model from', config_fpath)
    config_params = read_params(config_fpath, verbose=True)

    if args.checkpoint_file is None:
        args.checkpoint_file = odir + '/checkpoint'

    print('\n*** Start testing ***\n')
    learning_config = {'cuda': cuda}

    app = App(data,
              model_config=config_params[0],
              learning_config=learning_config,
              pretrained_weight=args.checkpoint_file,
              early_stopping=True,
              patience=20,
              json_path=args.input_data_file,
              vocab_path=args.vocab_path,
              mapping_path=args.mapping_path)
    app.test_on_data(args.checkpoint_file)
Exemplo n.º 6
0
    # parser.add_argument("-o", "--out_dir", default=None)

    args = vars(parser.parse_args())
    print('args', args)

    # if args.train_tfidf and not args.from_report_folder:
    #     raise AssertionError('Train TF-IDF (-tf) cannot be on when processing from report folder (-fr) is off')

    if args['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args['gpu'])
    print('cuda', cuda)

    config_params = read_params(args['config_fpath'], verbose=False)
    # combine arguments from config file and args
    for key in args:
        config_params[key] = args[key]

    if 'prepend_vocab' in config_params and not config_params[
            'prepend_vocab'] and not config_params['vocab_path']:
        raise AssertionError(
            'prepend_vocab (-pv) cannot be off when no vocab_path is parsed (-v)'
        )

    if 'models/configs' not in config_params['config_fpath']:
        config_params['prep_path'] = os.path.dirname(
            config_params['config_fpath'])
    else:
        config_params['prep_path'] = None
Exemplo n.º 7
0
def run_app(args, data, cuda):
    # print_graph_stats(data[GRAPH])

    print('*** Load model from', args.config_fpath)

    ###########################
    # 1. Training
    ###########################
    if args.action == "train":
        now = time.strftime("%Y-%m-%d_%H-%M-%S")

        config_params = read_params(args.config_fpath, verbose=True)

        odir = 'output/' + now
        # default_path = create_default_path(odir+'/checkpoints')
        default_path = create_default_path(odir)
        print('\n*** Set default saving/loading path to:', default_path)

        learning_config = {
            'lr': args.lr,
            'epochs': args.epochs,
            'weight_decay': args.weight_decay,
            'batch_size': args.batch_size,
            'cuda': cuda
        }
        app = App(data,
                  model_config=config_params[0],
                  learning_config=learning_config,
                  pretrained_weight=args.checkpoint_file,
                  early_stopping=True,
                  patience=20,
                  json_path=args.input_data_file,
                  vocab_path=args.vocab_path,
                  mapping_path=args.mapping_path,
                  odir=odir)
        print('\n*** Start training ***\n')
        ''' save config to output '''
        shutil.copy(src=args.config_fpath,
                    dst=odir + '/' + args.config_fpath.split('/')[-1])
        shutil.copy(src='utils/prep_data.py', dst=odir + '/prep_data.py')
        shutil.copy(src='models/model_edgnn.py', dst=odir + '/model_edgnn.py')
        ''' train '''
        app.train(default_path,
                  k_fold=args.k_fold,
                  train_list_file=args.train_list_file,
                  test_list_file=args.test_list_file)
        app.test(default_path)
        # remove_model(default_path)

    ###########################
    # 2. Testing
    ###########################
    # if args.action == "test" and args.checkpoint_file is not None:
    if args.action == "test":
        print('\n*** Start testing ***\n')
        learning_config = {'cuda': cuda}
        # odir = 'output/2020-01-14_15-04-01'
        odir = args.out_dir

        config_fpath = odir + '/config_edGNN_graph_class.json'
        config_params = read_params(config_fpath, verbose=True)

        if args.checkpoint_file is None:
            args.checkpoint_file = odir + '/checkpoint'

        app = App(data,
                  model_config=config_params[0],
                  learning_config=learning_config,
                  pretrained_weight=args.checkpoint_file,
                  early_stopping=True,
                  patience=20,
                  json_path=args.input_data_file,
                  vocab_path=args.vocab_path,
                  mapping_path=args.mapping_path,
                  odir=odir)
        app.test(args.checkpoint_file)