Пример #1
0
def npzs2ftkcht(dir_path='.'):
	data, labels = [[] for i in range(2)]
	for file in fs.listf(dir_path):
		subdata_list = []
		if file.endswith(".npz"):
			npzf = io.read_npz(os.path.join(dir_path, file))
			dim_vals = npzf['dim_vals'].tolist()['featfilt__k']
			vals, val_ids = np.array(dim_vals.keys()), np.array(dim_vals.values())
			sorted_idx = vals.argsort()
			data.append((vals[sorted_idx], npzf['score_avg_cube'][val_ids[sorted_idx]]))
		labels.append(os.path.splitext(file)[0])
	plot.plot_ftnum(data, labels, marker=True)
Пример #2
0
def npzs2yaml(dir_path='.', mdl_t='Classifier'):
    pw = io.param_writer(os.path.join(dir_path, 'mdlcfg'))
    for file in fs.listf(dir_path):
        if file.endswith(".npz"):
            fpath = os.path.join(dir_path, file)
            params = io.read_npz(fpath)['best_params'].tolist()
            for k in params.keys():
                if (type(params[k]) == np.ndarray):
                    params[k] == params[k].tolist()
                if (isinstance(params[k], np.generic)):
                    params[k] = np.asscalar(params[k])
            pw(mdl_t, file, params)
    pw(None, None, None, True)
Пример #3
0
def avgfeatw(dir_path='.'):
	df_list = []
	for file in fs.listf(dir_path):
		if file.endswith(".npz"):
			df_list.append(io.read_df(os.path.join(dir_path, file), with_idx=True))
	feat_w_mt = pd.concat([df.loc[:,'Importance Mean'] for df in df_list], axis=1, join_axes=[df_list[0].index]).astype('float').values
	feat_w_avg = feat_w_mt.mean(axis=1)
	feat_w_std = feat_w_mt.std(axis=1)
	sorted_idx = np.argsort(feat_w_avg, axis=-1)[::-1]
	sorted_feat_w = np.column_stack((df_list[0].loc[:,'Feature Name'].values[sorted_idx], feat_w_avg[sorted_idx], feat_w_std[sorted_idx]))
	feat_w_df = pd.DataFrame(sorted_feat_w, index=df_list[0].index.values[sorted_idx], columns=['Feature Name', 'Importance Mean', 'Importance Std'])
	feat_w_df.to_excel(os.path.join(dir_path, 'featw.xlsx'))
	io.write_df(feat_w_df, os.path.join(dir_path, 'featw'), with_idx=True)
Пример #4
0
def add_d2v(n_components=100, win_size=8, min_t=5, mdl_fname='d2v.mdl'):
	from gensim.parsing.preprocessing import preprocess_string
	from gensim.models.doc2vec import TaggedDocument, Doc2Vec
	def read_files(fpaths, code='ascii'):
		for fpath in fpaths:
			try:
				yield TaggedDocument(words=preprocess_string('\n'.join(fs.read_file(fpath, code))), tags=[os.path.splitext(os.path.basename(fpath))[0]])
			except Exception as e:
				continue
	def read_prcsed_files(fpaths, code='ascii'):
		for fpath in fpaths:
			try:
				words = []
				for line in fs.read_file(fpath, code):
					if (line == '~~~'):
						continue
					if (line == '.	.	.' or line == '~~~	~~~' or line == ',	,	,'):
						continue
					items = line.split()
					if (len(items) < 3): # Skip the unrecognized words
						continue
					words.append(items[2].lower())
				yield TaggedDocument(words=words, tags=[os.path.splitext(os.path.basename(fpath))[0]])
			except Exception as e:
				continue
	mdl_fpath = os.path.join(spdr.DATA_PATH, mdl_fname)
	if (os.path.exists(mdl_fpath)):
		model = Doc2Vec.load(mdl_fpath)
	else:
		# model = Doc2Vec(read_files(fs.listf(spdr.ABS_PATH, full_path=True)), size=n_components, window=8, min_count=5, workers=opts.np)
		model = Doc2Vec(read_prcsed_files(fs.listf(os.path.join(spdr.DATA_PATH, 'lem'), full_path=True)), size=n_components, window=8, min_count=5, workers=opts.np)
		model.save(os.path.join(spdr.DATA_PATH, mdl_fname))
		
	X, Y = spdr.get_data(None, ft_type=opts.type, max_df=ast.literal_eval(opts.maxdf), min_df=ast.literal_eval(opts.mindf), from_file=True, fmt=opts.fmt, spfmt=opts.spfmt)
	# Map the index of original matrix to that of the paragraph vectors
	d2v_idx = [model.docvecs.index_to_doctag(i).rstrip('.lem') for i in range(model.docvecs.count)]
	mms = MinMaxScaler()
	d2v_X = pd.DataFrame(mms.fit_transform(model.docvecs[range(model.docvecs.count)]), index=d2v_idx, columns=['d2v_%i' % i for i in range(model.docvecs[0].shape[0])])
	# d2v_X = pd.DataFrame(model.docvecs[range(model.docvecs.count)], index=d2v_idx, columns=['d2v_%i' % i for i in range(model.docvecs[0].shape[0])])
	new_X = pd.concat([X, d2v_X], axis=1, join_axes=[X.index])
	print 'The size of data has been changed from %s to %s.' % (X.shape, new_X.shape)
	if (opts.fmt == 'npz'):
		io.write_df(d2v_X, os.path.join(spdr.DATA_PATH, 'd2v_X.npz'), with_idx=True, sparse_fmt=opts.spfmt, compress=True)
		io.write_df(new_X, os.path.join(spdr.DATA_PATH, 'cmb_d2v_X.npz'), with_idx=True, sparse_fmt=opts.spfmt, compress=True)
	else:
		d2v_X.to_csv(os.path.join(spdr.DATA_PATH, 'd2v_X.csv'), encoding='utf8')
		new_X.to_csv(os.path.join(spdr.DATA_PATH, 'cmb_d2v_X.csv'), encoding='utf8')
Пример #5
0
def get_featsets(feat_sets, label_num, labels=[]):
	feat_files = fs.listf(os.path.join(FEATS_PATH, 'byLabel'))
	featset_patn = re.compile('\s\d')
	if (len(labels) != 0):
		feature_sets = [[] for i in range(label_num)]
	else:
		feature_sets = []
	feat_stat = {}
	# Every feature set
	for fset in feat_sets:
		fs_list = []
		fs_stat = []
		# Every matched file
		for f in fnmatch.filter(feat_files, fset+'*'):
			ft_per_lb = []
			for line in fs.read_file(os.path.join(FEATS_PATH, 'byLabel', f), code='utf8'):
				feat_match = featset_patn.search(line)
				if (not feat_match):
					continue
				# Deal with different types of features
				if (fset == 'parse'):
					feature = line[:feat_match.start()].strip(' []').replace('\'', '').replace(', ', ',')
				else:
					feature = line[:feat_match.start()].strip(' ')
				ft_per_lb.append(feature)
			fs_per_lb = set(ft_per_lb)
			fs_list.append(fs_per_lb)
			fs_stat.append(len(fs_per_lb))
		
		# If the number of feature-set files is not equal to that of labels, combine the redundance into the last label
		if (len(fs_list) > label_num):
			fs_list[label_num-1].update(set.union(*fs_list[label_num:]))
			fs_stat[label_num-1] = sum(fs_stat[label_num-1:])
			del fs_list[label_num:]
			del fs_stat[label_num:]
			
		if (len(labels) != 0):
			for i in range(len(feature_sets)):
				feature_sets[i].append(fs_list[i])
		else:
			feature_sets.append(set.union(*fs_list))
		feat_stat[fset] = fs_stat
	return feature_sets, feat_stat
Пример #6
0
def multi_clf(dev_id=None):
    '''Train multiple classifiers and use them to predict multiple set of labels'''
    import inflect
    from bionlp.util import fs
    iflteng = inflect.engine()

    logging.info('### Multi Classifier Head Mode ###')
    # Prepare model related meta data
    mdl_name = args.model.lower().replace(' ', '_')
    common_cfg = cfgr('validate', 'common')
    pr = io.param_reader(os.path.join(PAR_DIR, 'etc', '%s.yaml' % common_cfg.setdefault('mdl_cfg', 'mdlcfg')))
    config_kwargs = dict([(k, v) for k, v in args.__dict__.items() if not k.startswith('_') and k not in set(['dataset', 'model', 'template']) and v is not None and type(v) is not function])
    config = Configurable(args.task, mdl_name, common_cfg=common_cfg, wsdir=PAR_DIR, **config_kwargs)
    params = pr('LM', config.lm_params) if mdl_name != 'none' else {}
    use_gpu = dev_id is not None
    tokenizer = config.tknzr.from_pretrained(params['pretrained_vocab_path'] if 'pretrained_vocab_path' in params else config.lm_mdl_name) if config.tknzr else None
    task_type = config.task_type
    _adjust_encoder(tokenizer, config)
    special_tknids_args = dict(zip(special_tkns[0], special_tknids))
    task_trsfm_kwargs = dict(list(zip(special_tkns[0], special_tknids))+[('model',args.model), ('sentsim_func', args.sentsim_func), ('seqlen',args.maxlen)])
    # Prepare task related meta data.
    task_path, task_dstype, task_cols, task_trsfm, task_extparms = args.input if args.input and os.path.isdir(os.path.join(DATA_PATH, args.input)) else config.task_path, config.task_ds, config.task_col, config.task_trsfm, config.task_ext_params
    trsfms = (task_trsfm[0] if len(task_trsfm) > 0 else [])
    # trsfms_kwargs = ([] if args.model in LM_EMBED_MDL_MAP else ([{'seqlen':args.maxlen, 'xpad_val':task_extparms.setdefault('xpad_val', 0), 'ypad_val':task_extparms.setdefault('ypad_val', None)}] if TASK_TYPE_MAP[args.task]=='nmt' else [{'seqlen':args.maxlen, 'trimlbs':task_extparms.setdefault('trimlbs', False), 'special_tkns':special_tknids_args}, task_trsfm_kwargs, {'seqlen':args.maxlen, 'xpad_val':task_extparms.setdefault('xpad_val', 0), 'ypad_val':task_extparms.setdefault('ypad_val', None)}])) + (task_trsfm[1] if len(task_trsfm) >= 2 else [{}] * len(task_trsfm[0]))
    trsfms_kwargs = ([] if hasattr(config, 'embed_type') and config.embed_type else ([{'seqlen':args.maxlen, 'xpad_val':task_extparms.setdefault('xpad_val', 0), 'ypad_val':task_extparms.setdefault('ypad_val', None)}] if config.task_type=='nmt' else [{'seqlen':args.maxlen, 'trimlbs':task_extparms.setdefault('trimlbs', False), 'required_special_tkns':['start_tknids', 'clf_tknids', 'delim_tknids'] if task_type in ['entlmnt', 'sentsim'] and (task_extparms.setdefault('sentsim_func', None) is None or not mdl_name.startswith('bert')) else ['start_tknids', 'clf_tknids'], 'special_tkns':special_tknids_args}, task_trsfm_kwargs, {'seqlen':args.maxlen, 'xpad_val':task_extparms.setdefault('xpad_val', 0), 'ypad_val':task_extparms.setdefault('ypad_val', None)}])) + (task_trsfm[1] if len(task_trsfm) >= 2 else [{}] * len(task_trsfm[0]))
    ds_kwargs = {'sampw':args.sample_weights, 'sampfrac':args.sampfrac}
    if task_type == 'nmt':
        ds_kwargs.update({'lb_coding':task_extparms.setdefault('lb_coding', 'IOB')})
    elif task_type == 'entlmnt':
        ds_kwargs.update(dict((k, task_extparms[k]) for k in ['origlb', 'lbtxt', 'neglbs', 'reflb'] if k in task_extparms))
    elif task_type == 'sentsim':
        ds_kwargs.update({'ynormfunc':task_extparms.setdefault('ynormfunc', None)})
    global_all_binlb = {}

    ext_params = dict([(k, getattr(args, k)) if hasattr(args, k) else (k, v) for k, v in config.clf_ext_params.items()])
    if hasattr(config, 'embed_type') and config.embed_type: ext_params['embed_type'] = config.embed_type
    task_params = dict([(k, getattr(args, k)) if hasattr(args, k) and getattr(args, k) is not None else (k, v) for k, v in task_extparms.setdefault('mdlcfg', {}).items()])
    logging.info('Classifier hyper-parameters: %s' % ext_params)
    logging.info('Classifier task-related parameters: %s' % task_params)
    orig_epochs = mltclf_epochs = args.epochs
    elapsed_mltclf_epochs, args.epochs = 0, 1
    if (args.resume):
        # Load model
        clf, prv_optimizer, resume, chckpnt = load_model(args.resume)
        if args.refresh:
            logging.info('Refreshing and saving the model with newest code...')
            try:
                save_model(clf, prv_optimizer, '%s_%s.pth' % (args.task, args.model))
            except Exception as e:
                logging.warning(e)
        elapsed_mltclf_epochs, all_binlb = chckpnt.setdefault('mltclf_epochs', 0), clf.binlb
        # Update parameters
        clf.update_params(task_params=task_params, **ext_params)
        if (use_gpu): clf = _handle_model(clf, dev_id=dev_id, distrb=args.distrb)
        # optmzr_cls = OPTMZR_MAP.setdefault(args.model.split('_')[0], (torch.optim.Adam, {}, None))
        optmzr_cls = config.optmzr if config.optmzr else (torch.optim.Adam, {}, None)
        optimizer = optmzr_cls[0](clf.parameters(), lr=args.lr, weight_decay=args.wdecay, **optmzr_cls[1]) if args.optim == 'adam' else torch.optim.SGD(clf.parameters(), lr=args.lr, momentum=0.9)
        if prv_optimizer: optimizer.load_state_dict(prv_optimizer.state_dict())
        training_steps = int(len(train_ds) / args.bsize) if hasattr(train_ds, '__len__') else args.trainsteps
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.wrmprop, num_training_steps=training_steps) if not args.noschdlr and len(optmzr_cls) > 2 and optmzr_cls[2] and optmzr_cls[2] == 'linwarm' else None
        logging.info((optimizer, scheduler))
    else:
        # Build model
        lm_model = gen_mdl(mdl_name, config, pretrained=True if type(args.pretrained) is str and args.pretrained.lower() == 'true' else args.pretrained, use_gpu=use_gpu, distrb=args.distrb, dev_id=dev_id) if mdl_name != 'none' else None
        clf = gen_clf(args.model, config, args.encoder, lm_model=lm_model, mlt_trnsfmr=True if task_type in ['entlmnt', 'sentsim'] and task_params.setdefault('sentsim_func', None) is not None else False, task_params=task_params, use_gpu=use_gpu, distrb=args.distrb, dev_id=dev_id, **ext_params)
        # optmzr_cls = OPTMZR_MAP.setdefault(args.model.split('_')[0], (torch.optim.Adam, {}, None))
        optmzr_cls = config.optmzr if config.optmzr else (torch.optim.Adam, {}, None)
        optimizer = optmzr_cls[0](clf.parameters(), lr=args.lr, weight_decay=args.wdecay, **optmzr_cls[1]) if args.optim == 'adam' else torch.optim.SGD(clf.parameters(), lr=args.lr, momentum=0.9)
        training_steps = int(len(train_ds) / args.bsize) if hasattr(train_ds, '__len__') else args.trainsteps
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.wrmprop, num_training_steps=training_steps) if not args.noschdlr and len(optmzr_cls) > 2 and optmzr_cls[2] and optmzr_cls[2] == 'linwarm' else None
        logging.info((optimizer, scheduler))

    # Prepare data
    logging.info('Dataset path: %s' % os.path.join(DATA_PATH, task_path))
    num_clfs = min([len(fs.listf(os.path.join(DATA_PATH, task_path), pattern='%s_\d.csv' % x)) for x in ['train', 'dev', 'test']])
    for epoch in range(elapsed_mltclf_epochs, mltclf_epochs):
        logging.info('Global %i epoch(s)...' % epoch)
        clf.reset_global_binlb()
        all_binlb = {}
        for i in range(num_clfs):
            logging.info('Training on the %s sub-dataset...' % iflteng.ordinal(i+1))
            train_ds = task_dstype(os.path.join(DATA_PATH, task_path, 'train_%i.%s' % (i, args.fmt)), task_cols['X'], task_cols['y'], config.encode_func, tokenizer, config, sep='\t', index_col=task_cols['index'], binlb=task_extparms['binlb'] if 'binlb' in task_extparms else None, transforms=trsfms, transforms_kwargs=trsfms_kwargs, mltl=task_extparms.setdefault('mltl', False), **ds_kwargs)
            new_lbs = [k for k in train_ds.binlb.keys() if k not in all_binlb]
            all_binlb.update(dict([(k, v) for k, v in zip(new_lbs, range(len(all_binlb), len(all_binlb)+len(new_lbs)))]))
            if mdl_name.startswith('bert'): train_ds = MaskedLMIterDataset(train_ds) if isinstance(train_ds, BaseIterDataset) else MaskedLMDataset(train_ds)
            lb_trsfm = [x['get_lb'] for x in task_trsfm[1] if 'get_lb' in x]
            if (not args.weight_class or task_type == 'sentsim'):
                class_count = None
            elif len(lb_trsfm) > 0:
                lb_df = train_ds.df[task_cols['y']].apply(lb_trsfm[0])
                class_count = np.array([[1 if lb in y else 0 for lb in train_ds.binlb.keys()] for y in lb_df]).sum(axis=0)
            else:
                lb_df = train_ds.df[task_cols['y']]
                binlb = task_extparms['binlb'] if 'binlb' in task_extparms and type(task_extparms['binlb']) is not str else train_ds.binlb
                class_count = lb_df.value_counts()[binlb.keys()].values
            if (class_count is None):
                class_weights = None
                sampler = None
            else:
                class_weights = torch.Tensor(1.0 / class_count)
                class_weights /= class_weights.sum()
                class_weights *= (args.clswfac[min(len(args.clswfac)-1, i)] if type(args.clswfac) is list else args.clswfac)
                sampler = WeightedRandomSampler(weights=class_weights, num_samples=args.bsize, replacement=True)
                if type(dev_id) is list: class_weights = class_weights.repeat(len(dev_id))
            train_loader = DataLoader(train_ds, batch_size=args.bsize, shuffle=False, sampler=None, num_workers=args.np, drop_last=args.droplast)

            dev_ds = task_dstype(os.path.join(DATA_PATH, task_path, 'dev_%i.%s' % (i, args.fmt)), task_cols['X'], task_cols['y'], config.encode_func, tokenizer, config, sep='\t', index_col=task_cols['index'], binlb=task_extparms['binlb'] if 'binlb' in task_extparms and type(task_extparms['binlb']) is not str else all_binlb, transforms=trsfms, transforms_kwargs=trsfms_kwargs, mltl=task_extparms.setdefault('mltl', False), **ds_kwargs)
            if mdl_name.startswith('bert'): dev_ds = MaskedLMIterDataset(train_ds) if isinstance(train_ds, BaseIterDataset) else MaskedLMDataset(dev_ds)
            dev_loader = DataLoader(dev_ds, batch_size=args.bsize, shuffle=False, num_workers=args.np)
            test_ds = task_dstype(os.path.join(DATA_PATH, task_path, 'test_%i.%s' % (i, args.fmt)), task_cols['X'], task_cols['y'], config.encode_func, tokenizer, config, sep='\t', index_col=task_cols['index'], binlb=task_extparms['binlb'] if 'binlb' in task_extparms and type(task_extparms['binlb']) is not str else all_binlb, transforms=trsfms, transforms_kwargs=trsfms_kwargs, mltl=task_extparms.setdefault('mltl', False), **ds_kwargs)
            if mdl_name.startswith('bert'): test_ds = MaskedLMIterDataset(train_ds) if isinstance(train_ds, BaseIterDataset) else MaskedLMDataset(test_ds)
            test_loader = DataLoader(test_ds, batch_size=args.bsize, shuffle=False, num_workers=args.np)
            logging.debug(('binlb', train_ds.binlb, dev_ds.binlb, test_ds.binlb))

            # Adjust the model
            clf.get_linear(binlb=train_ds.binlb, idx=i)

            # Training on splitted datasets
            train(clf, optimizer, train_loader, config, special_tknids_args, scheduler=scheduler, pad_val=(task_extparms.setdefault('xpad_val', 0), train_ds.binlb[task_extparms.setdefault('ypad_val', 0)]) if task_type=='nmt' else task_extparms.setdefault('xpad_val', 0), weights=class_weights, lmcoef=args.lmcoef, clipmaxn=args.clipmaxn, epochs=args.epochs, earlystop=args.earlystop, earlystop_delta=args.es_delta, earlystop_patience=args.es_patience, task_type=task_type, task_name=args.task, mdl_name=args.model, use_gpu=use_gpu, devq=dev_id, resume=resume if args.resume else {}, chckpnt_kwargs=dict(mltclf_epochs=epoch))

            # Adjust the model
            clf_trnsfmr = MultiClfTransformer(clf)
            clf_trnsfmr.merge_linear(num_linear=i+1)
            clf.linear = _handle_model(clf.linear, dev_id=dev_id, distrb=args.distrb)

            # Evaluating on the accumulated dev and test sets
            eval(clf, dev_loader, config, dev_ds.binlbr, special_tknids_args, pad_val=(task_extparms.setdefault('xpad_val', 0), train_ds.binlb[task_extparms.setdefault('ypad_val', 0)]) if task_type=='nmt' else task_extparms.setdefault('xpad_val', 0), task_type=task_type, task_name=args.task, ds_name='dev', mdl_name=args.model, use_gpu=use_gpu, ignored_label=task_extparms.setdefault('ignored_label', None))
            eval(clf, test_loader, config, test_ds.binlbr, special_tknids_args, pad_val=(task_extparms.setdefault('xpad_val', 0), train_ds.binlb[task_extparms.setdefault('ypad_val', 0)]) if task_type=='nmt' else task_extparms.setdefault('xpad_val', 0), task_type=task_type, task_name=args.task, ds_name='test', mdl_name=args.model, use_gpu=use_gpu, ignored_label=task_extparms.setdefault('ignored_label', None))
        global_all_binlb.update(all_binlb)
        # clf.binlb = all_binlb
        # clf.binlbr = dict([(v, k) for k, v in all_binlb.items()])
    else:
        if orig_epochs > 0:
            try:
                save_model(clf, optimizer, '%s_%s.pth' % (args.task, args.model), devq=dev_id, distrb=args.distrb)
            except Exception as e:
                logging.warning(e)
    args.epochs = orig_epochs

    if args.noeval: return
    dev_ds = task_dstype(os.path.join(DATA_PATH, task_path, 'dev.%s' % args.fmt), task_cols['X'], task_cols['y'], config.encode_func, tokenizer, config, sep='\t', index_col=task_cols['index'], binlb=task_extparms['binlb'] if 'binlb' in task_extparms and type(task_extparms['binlb']) is not str else all_binlb, transforms=trsfms, transforms_kwargs=trsfms_kwargs, mltl=task_extparms.setdefault('mltl', False), **ds_kwargs)
    if mdl_name.startswith('bert'): dev_ds = MaskedLMIterDataset(train_ds) if isinstance(train_ds, BaseIterDataset) else MaskedLMDataset(dev_ds)
    dev_loader = DataLoader(dev_ds, batch_size=args.bsize, shuffle=False, num_workers=args.np)
    test_ds = task_dstype(os.path.join(DATA_PATH, task_path, 'test.%s' % args.fmt), task_cols['X'], task_cols['y'], config.encode_func, tokenizer, config, sep='\t', index_col=task_cols['index'], binlb=task_extparms['binlb'] if 'binlb' in task_extparms and type(task_extparms['binlb']) is not str else all_binlb, transforms=trsfms, transforms_kwargs=trsfms_kwargs, mltl=task_extparms.setdefault('mltl', False), **ds_kwargs)
    if mdl_name.startswith('bert'): test_ds = MaskedLMIterDataset(train_ds) if isinstance(train_ds, BaseIterDataset) else MaskedLMDataset(test_ds)
    test_loader = DataLoader(test_ds, batch_size=args.bsize, shuffle=False, num_workers=args.np)

    # Evaluation
    eval(clf, dev_loader, config, dev_ds.binlbr, special_tknids_args, pad_val=(task_extparms.setdefault('xpad_val', 0), train_ds.binlb[task_extparms.setdefault('ypad_val', 0)]) if task_type=='nmt' else task_extparms.setdefault('xpad_val', 0), task_type=task_type, task_name=args.task, ds_name='dev', mdl_name=args.model, use_gpu=use_gpu, ignored_label=task_extparms.setdefault('ignored_label', None))
    if args.traindev: train(clf, optimizer, dev_loader, config, special_tknids_args, scheduler=scheduler, pad_val=(task_extparms.setdefault('xpad_val', 0), train_ds.binlb[task_extparms.setdefault('ypad_val', 0)]) if task_type=='nmt' else task_extparms.setdefault('xpad_val', 0), weights=class_weights, lmcoef=args.lmcoef, clipmaxn=args.clipmaxn, epochs=orig_epochs, earlystop=args.earlystop, earlystop_delta=args.es_delta, earlystop_patience=args.es_patience, task_type=task_type, task_name=args.task, mdl_name=args.model, use_gpu=use_gpu, devq=dev_id)
    eval(clf, test_loader, config, test_ds.binlbr, special_tknids_args, pad_val=(task_extparms.setdefault('xpad_val', 0), train_ds.binlb[task_extparms.setdefault('ypad_val', 0)]) if task_type=='nmt' else task_extparms.setdefault('xpad_val', 0), task_type=task_type, task_name=args.task, ds_name='test', mdl_name=args.model, use_gpu=use_gpu, ignored_label=task_extparms.setdefault('ignored_label', None))
Пример #7
0
def get_fsnames():
	return [f.split('.')[0] for f in fs.listf(FEATS_PATH)]