def test_score(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets() distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=3) score.score(taskfilename, distance_file, scorefilename) finally: try: shutil.rmtree('test_items') # os.remove(item_file) # os.remove(feature_file) # os.remove(taskfilename) # os.remove(distance_file) # os.remove(scorefilename) except: pass
def test_frozen_analyze(): """Frozen analyze compare the results of a previously "frozen" run with a new one, asserting that the code did not change in behaviour. """ try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = frozen_file('item') feature_file = frozen_file('features') distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) # assert items.h5cmp(taskfilename, frozen_file('abx')) # assert items.h5cmp(distance_file, frozen_file('distance')) assert items.h5cmp(scorefilename, frozen_file('score')) assert items.cmp(analyzefilename, frozen_file('csv')) finally: try: os.remove(taskfilename) os.remove(distance_file) os.remove(scorefilename) os.remove(analyzefilename) except: pass
def test_multiple_across(): items.generate_testitems(2, 3, name='data.item') try: with warnings.catch_warnings(): warnings.simplefilter("ignore") task = ABXpy.task.Task('data.item', 'c0', ['c1', 'c2']) stats = task.stats assert stats['nb_blocks'] == 8 assert stats['nb_triplets'] == 8 assert stats['nb_by_levels'] == 1 task.generate_triplets() f = h5py.File('data.abx', 'r') triplets_block = get_triplets(f, '0') triplets = np.array([[0, 1, 6], [1, 0, 7], [2, 3, 4], [3, 2, 5], [4, 5, 2], [5, 4, 3], [6, 7, 0], [7, 6, 1]]) assert tables_equivalent(triplets, triplets_block) finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def test_threshold_analyze(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' threshold = 2 items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename, threshold=threshold) distances.compute_distances( feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized = True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) number_triplets = np.loadtxt(analyzefilename, dtype=int, delimiter='\t', skiprows=1, usecols=[-1]) assert np.all(number_triplets == threshold) finally: try: shutil.rmtree('test_items') # os.remove(item_file) # os.remove(feature_file) # os.remove(taskfilename) # os.remove(distance_file) # os.remove(scorefilename) # os.remove(analyzefilename) except: pass
def test_basic(): items.generate_testitems(2, 3, name='data.item') try: task = ABXpy.task.Task('data.item', 'c0', 'c1', 'c2') stats = task.stats assert stats['nb_blocks'] == 8, "incorrect stats: number of blocks" assert stats['nb_triplets'] == 8 assert stats['nb_by_levels'] == 2 task.generate_triplets() f = h5py.File('data.abx', 'r') triplets = f['triplets']['data'][...] by_indexes = f['triplets']['by_index'][...] triplets_block0 = triplets[slice(*by_indexes[0])] triplets_block1 = triplets[slice(*by_indexes[1])] triplets_block0 = get_triplets(f, '0') triplets_block1 = get_triplets(f, '1') triplets = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0], [3, 2, 1]]) assert tables_equivalent(triplets, triplets_block0), error_triplets assert tables_equivalent(triplets, triplets_block1), error_triplets pairs = [2, 6, 7, 3, 8, 12, 13, 9] pairs_block0 = get_pairs(f, '0') pairs_block1 = get_pairs(f, '1') assert (set(pairs) == set(pairs_block0[:, 0])), error_pairs assert (set(pairs) == set(pairs_block1[:, 0])), error_pairs finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def fullrun(): if not os.path.exists('example_items'): os.makedirs('example_items') item_file = 'example_items/data.item' feature_file = 'example_items/data.features' distance_file = 'example_items/data.distance' scorefilename = 'example_items/data.score' taskfilename = 'example_items/data.abx' analyzefilename = 'example_items/data.csv' # deleting pre-existing files for f in [item_file, feature_file, distance_file, scorefilename, taskfilename, analyzefilename]: try: os.remove(f) except OSError: pass # running the evaluation items.generate_db_and_feat(3, 3, 5, item_file, 2, 2, feature_file) task = ABXpy.task.Task(item_file, 'c0', across='c1', by='c2') task.generate_triplets(taskfilename) distances.compute_distances( feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename)
def test_filter(): items.generate_testitems(2, 4, name='data.item') try: task = ABXpy.task.Task('data.item', 'c0', 'c1', 'c2', filters=["[attr == 0 for attr in c3]"]) stats = task.stats assert stats['nb_blocks'] == 8, "incorrect stats: number of blocks" assert stats['nb_triplets'] == 8 assert stats['nb_by_levels'] == 2 task.generate_triplets(output='data.abx') f = h5py.File('data.abx', 'r') triplets_block0 = get_triplets(f, '0') triplets_block1 = get_triplets(f, '1') triplets = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0], [3, 2, 1]]) assert tables_equivalent(triplets, triplets_block0), error_triplets assert tables_equivalent(triplets, triplets_block1), error_triplets pairs = [2, 6, 7, 3, 8, 12, 13, 9] pairs_block0 = get_pairs(f, '0') pairs_block1 = get_pairs(f, '1') assert (set(pairs) == set(pairs_block0[:, 0])), error_pairs assert (set(pairs) == set(pairs_block1[:, 0])), error_pairs finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def test_analyze(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) finally: shutil.rmtree('test_items', ignore_errors=True)
def test_score(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets() distances.compute_distances( feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized = True, n_cpu=3) score.score(taskfilename, distance_file, scorefilename) finally: try: shutil.rmtree('test_items') # os.remove(item_file) # os.remove(feature_file) # os.remove(taskfilename) # os.remove(distance_file) # os.remove(scorefilename) except: pass
def test_threshold_analyze(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' threshold = 2 items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename, threshold=threshold) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) number_triplets = np.loadtxt(analyzefilename, dtype=int, delimiter='\t', skiprows=1, usecols=[-1]) assert np.all(number_triplets == threshold) finally: shutil.rmtree('test_items', ignore_errors=True)
def test_frozen_analyze(): """Frozen analyze compare the results of a previously "frozen" run with a new one, asserting that the code did not change in behaviour. """ try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = frozen_file('item') feature_file = frozen_file('features') distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) # assert items.h5cmp(taskfilename, frozen_file('abx')) # assert items.h5cmp(distance_file, frozen_file('distance')) # assert items.h5cmp(scorefilename, frozen_file('score')) assert items.csv_cmp(analyzefilename, frozen_file('csv')) finally: shutil.rmtree('test_items', ignore_errors=True)
def test_analyze(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) finally: try: os.remove(item_file) os.remove(feature_file) os.remove(taskfilename) os.remove(distance_file) os.remove(scorefilename) os.remove(analyzefilename) # pass except: pass
def test_analyze(itemfile, featurefile, args, taskfile=None, distance=None, distancefile=None, scorefile=None, analyzefile=None, filename=None): on = get_arg('on', args) assert on, ("The 'on' argument was not found, this argument is mandatory" "for the task") across = get_arg('across', args) by = get_arg('by', args) filters = get_arg('filters', args) reg = get_arg('reg', args) if not filename: filename = '_'.join( filter(None, [ get_name(itemfile), get_name(featurefile), str(on), str(across), str(by) ])) if not distancefile: distancefile = filename + '.distance' if not scorefile: scorefile = filename + '.score' if not analyzefile: analyzefile = filename + '.csv' task = ABXpy.task.Task(itemfile, on, across, by, filters, reg, features=featurefile) task.generate_triplets() if not distance: distance = dtw_cosine_distance distances.compute_distances(featurefile, '/features/', taskfile, distancefile, distance) score.score(taskfile, distancefile, scorefile) analyze.analyze(scorefile, taskfile, analyzefile)
def test_no_across(): items.generate_testitems(2, 3, name='data.item') try: task = ABXpy.task.Task('data.item', 'c0', None, 'c2') stats = task.stats assert stats['nb_blocks'] == 8 assert stats['nb_triplets'] == 16 assert stats['nb_by_levels'] == 2 task.generate_triplets() finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def test_multiple_bys(): items.generate_testitems(3, 4, name='data.item') try: task = ABXpy.task.Task('data.item', 'c0', None, ['c1', 'c2', 'c3']) stats = task.stats assert stats['nb_blocks'] == 81 assert stats['nb_triplets'] == 0 assert stats['nb_by_levels'] == 27 task.generate_triplets() finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def test_multiple_bys(): items.generate_testitems(3, 4, name='data.item') try: task = ABXpy.task.Task('data.item', 'c0', None, ['c1', 'c2', 'c3']) stats = task.stats assert stats['nb_blocks'] == 81 assert stats['nb_triplets'] == 0 assert stats['nb_by_levels'] == 27 with warnings.catch_warnings(): warnings.simplefilter("ignore") task.generate_triplets() finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def test_sampling_task(): items.generate_testitems(4, 6, name='data.item') try: task = ABXpy.task.Task('data.item', 'c0', 'c1', ['c2', 'c3']) print "stats computed" # stats = task.stats task.generate_triplets(sample=0.2) print "first sample" os.remove('data.abx') task.generate_triplets(sample=200) print "second sample" finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def fullrun(): if not os.path.exists('example_items'): os.makedirs('example_items') item_file = 'example_items/data.item' feature_file = 'example_items/data.features' distance_file = 'example_items/data.distance' scorefilename = 'example_items/data.score' taskfilename = 'example_items/data.abx' analyzefilename = 'example_items/data.csv' items.generate_db_and_feat(3, 3, 1, item_file, 2, 2, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2', features=feature_file) task.generate_triplets() distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(scorefilename, taskfilename, analyzefilename)
def test_filter_on_B(): items.generate_testitems(2, 2, name='data.item') try: task = ABXpy.task.Task('data.item', 'c0', filters=["[attr == 0 for attr in c1_B]"]) stats = task.stats assert stats['nb_blocks'] == 4, "incorrect stats: number of blocks" assert stats['nb_triplets'] == 4 assert stats['nb_by_levels'] == 1 task.generate_triplets() f = h5py.File('data.abx', 'r') triplets_block0 = get_triplets(f, '0') triplets = np.array([[0, 1, 2], [1, 0, 3], [2, 1, 0], [3, 0, 1]]) assert tables_equivalent(triplets, triplets_block0), error_triplets finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def test_filter_on_C(): items.generate_testitems(2, 2, name='data.item') try: task = ABXpy.task.Task('data.item', 'c0', filters=["[attr == 0 for attr in c1_X]"]) stats = task.stats assert stats['nb_blocks'] == 4, "incorrect stats: number of blocks" assert stats['nb_triplets'] == 4 assert stats['nb_by_levels'] == 1 task.generate_triplets() f = h5py.File('data.abx', 'r') triplets_block0 = get_triplets(f, '0') triplets = np.array([[2, 1, 0], [2, 3, 0], [3, 0, 1], [3, 2, 1]]) assert tables_equivalent(triplets, triplets_block0), error_triplets finally: try: os.remove('data.abx') os.remove('data.item') except: pass
def memorizable_abx(data_file, on, across, by, njobs, tmpdir=None, distance=cosine_distance, item_features_hash='0'): ''' wrap ABXpy funcions and compute the scores ''' item_file = '{}.item'.format(data_file) feature_file = '{}.features'.format(data_file) if not os.path.isfile(item_file) or not os.path.isfile(feature_file): raise ValueError('item_file or feature_file doesnt exist') distance_file = '{}.distance'.format(data_file) score_file = '{}.score'.format(data_file) task_file = '{}.abx'.format(data_file) analyze_file = '{}.csv'.format(data_file) # clean up before compute ABX remove_files = [distance_file, score_file, task_file, analyze_file] map(os.remove, filter(os.path.exists, remove_files)) # running the evaluation task = ABXpy.task.Task(item_file, on, across=across, by=by, verbose=False) task.generate_triplets(task_file, tmpdir=tmpdir) distances.compute_distances(feature_file, '/features/', task_file, distance_file, distance, normalized=True, n_cpu=njobs) score.score(task_file, distance_file, score_file) analyze.analyze(task_file, score_file, analyze_file) # I will keep only the ABX scores remove_files = [distance_file, score_file, task_file] map(os.remove, filter(os.path.exists, remove_files)) analyze_data = open(analyze_file, 'r').read() return analyze_data
def test_analyze( itemfile, featurefile, args, taskfile=None, distance=None, distancefile=None, scorefile=None, analyzefile=None, filename=None, ): on = get_arg("on", args) assert on, "The 'on' argument was not found, this argument is mandatory" "for the task" across = get_arg("across") by = get_arg("by") filters = get_arg("filters") reg = get_arg("reg") if not filename: filename = "_".join(filter(None, [get_name(itemfile), get_name(featurefile), str(on), str(across), str(by)])) if not distancefile: distancefile = filename + ".distance" if not scorefile: scorefile = filename + ".score" if not analyzefile: analyzefile = filename + ".csv" task = ABXpy.task.Task(itemfile, on, across, by, filters, reg, features=featurefile) task.generate_triplets() if not distance: distance = dtw_cosine_distance distances.compute_distances(featurefile, "/features/", taskfile, distancefile, distance) score.score(taskfile, distancefile, scorefile) analyze.analyze(scorefile, taskfile, analyzefile)
def fullrun(): if type(BY) == list: out = '/' + 'on_' + ON[0:2] + '_ac_' + ACROSS[0:2] + '_by_' + BY[0][ 0:2] + '_' + BY[1][0:2] else: out = '/' + 'on_' + ON[0:2] + '_ac_' + ACROSS[0:2] + '_by_' + BY[0:2] output_folder = input_folder + out print("the input folder is " + input_folder + "\n") print("the ABX task id done :" + out + "\n") print(feature) if not os.path.exists(output_folder): os.makedirs(output_folder) item_file = input_folder + '/' + ON + '.item' feature_file = input_folder + '/' + feature distance_file = output_folder + '/' + out + '.distance' scorefilename = output_folder + '/' + out + '.score' taskfilename = output_folder + '/' + out + '.abx' analyzefilename = output_folder + '/' + out + '.csv' statsfilename = output_folder + '/' + out + '.stats' # running the evaluation: if not os.path.exists(taskfilename): if ACROSS == "na" and BY != "na": task = ABXpy.task.Task(item_file, ON, by=BY) elif BY == "na" and ACROSS != "na": task = ABXpy.task.Task(item_file, ON, across=ACROSS) elif ACROSS == "na" and BY == "na": task = ABXpy.task.Task(item_file, ON) else: task = ABXpy.task.Task(item_file, ON, across=ACROSS, by=BY) task.generate_triplets(taskfilename) try: task.print_stats(statsfilename) except: pass print("the abx task file is created") print("number of cpu used is " + str(NB_CPU)) if not os.path.exists(distance_file): if distance == 'cosine': distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=NB_CPU) elif distance == 'kl': distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_kl_divergence, normalized=True, n_cpu=NB_CPU) else: raise ValueError('distance must be either cosine or kl') print("Computing %(distance)s distance has been computed") else: print("distance has already been computed") if not os.path.exists(scorefilename): score.score(taskfilename, distance_file, scorefilename) print("Score is computed") if not os.path.exists(analyzefilename): analyze.analyze(taskfilename, scorefilename, analyzefilename) print("Raw results are available in the csv file !") eval_abx.avg(analyzefilename, out_res, ON, ACROSS, ponderate) print('evaluation done')