def pivot_selection_timed(*args, **kwargs): print('Pivot selection') tinit = time() s_pivots, t_pivots = pivot_selection(*args, **kwargs) pivot_time = time() - tinit print(f'\t[pivot selection took {pivot_time:.3f} seconds]') return s_pivots, t_pivots, pivot_time
mds_home= '../datasets/MDS' dataset_home='../datasets/Webis-CLS-10' nfolds=5 outfile = './DCI.varpivot.dat' if exists(outfile): rperf = Result.load(outfile, False) else: rperf = Result(['dataset', 'task', 'method', 'fold', 'npivots', 'acc', 'dci_time', 'svm_time']) pivot_range = [10,25,50,100,250,500,1000,1500,2000,2500,5000] for source, target, fold, taskname in MDS_task_generator(abspath(mds_home), nfolds=nfolds): s_pivots, t_pivots = pivot_selection(max(pivot_range), source.X, source.y, source.U, target.U, source.V, target.V, phi=1, cross=True) for npivots in pivot_range: for dcf in ['cosine','linear']: dci = DCI(dcf=dcf, unify=False, post='normal') acc, dci_time, svm_time, _ = DCIclassify(source, target, s_pivots[:npivots], t_pivots[:npivots], dci, optimize=True) rperf.add(dataset='MDS', task=taskname, method=str(dci), fold=fold, npivots=npivots, acc=acc, dci_time=dci_time, svm_time=svm_time) rperf.pivot(index=['dataset', 'task','npivots'], values=['acc', 'dci_time', 'svm_time']) rperf.dump(outfile) for source, target, oracle, taskname in WebisCLS10_task_generator(abspath(dataset_home)): s_pivots, t_pivots = pivot_selection(max(pivot_range), source.X, source.y, source.U, target.U, source.V, target.V, oracle=oracle, phi=30, cross=False) for npivots in pivot_range:
dataset_home = '../datasets/Webis-CLS-10' rperf = Result([ 'dataset', 'task', 'method', 'acc', 'pivot_t', 'dci_t', 'svm_t', 'test_t' ]) for source, target, oracle, taskname in WebisCLS10_crossdomain_crosslingual_task_generator( os.path.abspath(dataset_home)): # pivot selection tinit = time() s_pivots, t_pivots = pivot_selection(npivots, source.X, source.y, source.U, target.U, source.V, target.V, oracle=oracle, phi=30, show=min(10, npivots), cross=True) pivot_time = time() - tinit print('pivot selection took {:.3f} seconds'.format(pivot_time)) dci = DCI(dcf=dcf, unify=True, post='normal') acc, dci_time, svm_time, test_time = DCIclassify(source, target, s_pivots, t_pivots, dci, optimize=optimize)