def test_analyze(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) finally: shutil.rmtree('test_items', ignore_errors=True)
def test_frozen_analyze(): """Frozen analyze compare the results of a previously "frozen" run with a new one, asserting that the code did not change in behaviour. """ try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = frozen_file('item') feature_file = frozen_file('features') distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) # assert items.h5cmp(taskfilename, frozen_file('abx')) # assert items.h5cmp(distance_file, frozen_file('distance')) # assert items.h5cmp(scorefilename, frozen_file('score')) assert items.csv_cmp(analyzefilename, frozen_file('csv')) finally: shutil.rmtree('test_items', ignore_errors=True)
def test_threshold_analyze(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' threshold = 2 items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename, threshold=threshold) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) number_triplets = np.loadtxt(analyzefilename, dtype=int, delimiter='\t', skiprows=1, usecols=[-1]) assert np.all(number_triplets == threshold) finally: shutil.rmtree('test_items', ignore_errors=True)
def test_analyze(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) finally: try: os.remove(item_file) os.remove(feature_file) os.remove(taskfilename) os.remove(distance_file) os.remove(scorefilename) os.remove(analyzefilename) # pass except: pass
def test_score(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets() distances.compute_distances( feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized = True, n_cpu=3) score.score(taskfilename, distance_file, scorefilename) finally: try: shutil.rmtree('test_items') # os.remove(item_file) # os.remove(feature_file) # os.remove(taskfilename) # os.remove(distance_file) # os.remove(scorefilename) except: pass
def evaluate_abx(args): out_dir = Path(args.out_dir) out_dir.mkdir(parents=True, exist_ok=True) feature_path = out_dir / "features.features" distance_path = out_dir / "data.distance" score_path = out_dir / "data.score" analyze_path = out_dir / "data.csv" if not feature_path.exists(): convert(args.feature_dir, h5_filename=str(feature_path)) if not distance_path.exists(): distances.compute_distances(str(feature_path), "features", str(args.task_path), str(distance_path), dtw_cosine_distance, normalized=True, n_cpu=6) if not score_path.exists(): score.score(str(args.task_path), str(distance_path), str(score_path)) if not analyze_path.exists(): analyze.analyze(str(args.task_path), str(score_path), str(analyze_path)) abx = average_abx(str(analyze_path), args.task_type) print("average abx: {:.3f}".format(abx))
def test_frozen_analyze(): """Frozen analyze compare the results of a previously "frozen" run with a new one, asserting that the code did not change in behaviour. """ try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = frozen_file('item') feature_file = frozen_file('features') distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) # assert items.h5cmp(taskfilename, frozen_file('abx')) # assert items.h5cmp(distance_file, frozen_file('distance')) assert items.h5cmp(scorefilename, frozen_file('score')) assert items.cmp(analyzefilename, frozen_file('csv')) finally: try: os.remove(taskfilename) os.remove(distance_file) os.remove(scorefilename) os.remove(analyzefilename) except: pass
def test_score(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets() distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=3) score.score(taskfilename, distance_file, scorefilename) finally: try: shutil.rmtree('test_items') # os.remove(item_file) # os.remove(feature_file) # os.remove(taskfilename) # os.remove(distance_file) # os.remove(scorefilename) except: pass
def test_threshold_analyze(): try: if not os.path.exists('test_items'): os.makedirs('test_items') item_file = 'test_items/data.item' feature_file = 'test_items/data.features' distance_file = 'test_items/data.distance' scorefilename = 'test_items/data.score' taskfilename = 'test_items/data.abx' analyzefilename = 'test_items/data.csv' threshold = 2 items.generate_db_and_feat(3, 3, 1, item_file, 2, 3, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2') task.generate_triplets(taskfilename, threshold=threshold) distances.compute_distances( feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized = True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename) number_triplets = np.loadtxt(analyzefilename, dtype=int, delimiter='\t', skiprows=1, usecols=[-1]) assert np.all(number_triplets == threshold) finally: try: shutil.rmtree('test_items') # os.remove(item_file) # os.remove(feature_file) # os.remove(taskfilename) # os.remove(distance_file) # os.remove(scorefilename) # os.remove(analyzefilename) except: pass
def fullrun(): if not os.path.exists('example_items'): os.makedirs('example_items') item_file = 'example_items/data.item' feature_file = 'example_items/data.features' distance_file = 'example_items/data.distance' scorefilename = 'example_items/data.score' taskfilename = 'example_items/data.abx' analyzefilename = 'example_items/data.csv' # deleting pre-existing files for f in [item_file, feature_file, distance_file, scorefilename, taskfilename, analyzefilename]: try: os.remove(f) except OSError: pass # running the evaluation items.generate_db_and_feat(3, 3, 5, item_file, 2, 2, feature_file) task = ABXpy.task.Task(item_file, 'c0', across='c1', by='c2') task.generate_triplets(taskfilename) distances.compute_distances( feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=1) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(taskfilename, scorefilename, analyzefilename)
def test_analyze(itemfile, featurefile, args, taskfile=None, distance=None, distancefile=None, scorefile=None, analyzefile=None, filename=None): on = get_arg('on', args) assert on, ("The 'on' argument was not found, this argument is mandatory" "for the task") across = get_arg('across', args) by = get_arg('by', args) filters = get_arg('filters', args) reg = get_arg('reg', args) if not filename: filename = '_'.join( filter(None, [ get_name(itemfile), get_name(featurefile), str(on), str(across), str(by) ])) if not distancefile: distancefile = filename + '.distance' if not scorefile: scorefile = filename + '.score' if not analyzefile: analyzefile = filename + '.csv' task = ABXpy.task.Task(itemfile, on, across, by, filters, reg, features=featurefile) task.generate_triplets() if not distance: distance = dtw_cosine_distance distances.compute_distances(featurefile, '/features/', taskfile, distancefile, distance) score.score(taskfile, distancefile, scorefile) analyze.analyze(scorefile, taskfile, analyzefile)
def _abx(features_path, temp_dir, task, task_type, load_fun, distance, normalized, njobs, log): """Runs the ABX pipeline""" dist2fun = { 'cosine': default_distance, 'KL': dtw_kl_distance, 'levenshtein': edit_distance } # convert log.debug('loading features ...') features = os.path.join(temp_dir, 'features.h5') if not os.path.isfile(features): convert(features_path, h5_filename=features, load=load_fun) # avoid annoying log message numexpr.set_num_threads(njobs) log.debug('computing %s distances ...', distance) # ABX Distances prints some messages we do not want to display sys.stdout = open(os.devnull, 'w') distance_file = os.path.join(temp_dir, 'distance_{}.h5'.format(task_type)) with warnings.catch_warnings(): # inhibit some useless warnings about complex to float conversion warnings.filterwarnings("ignore", category=np.ComplexWarning) # compute the distances ABXpy.distances.distances.compute_distances(features, 'features', task, distance_file, dist2fun[distance], normalized, n_cpu=njobs) sys.stdout = sys.__stdout__ log.debug('computing abx score ...') # score score_file = os.path.join(temp_dir, 'score_{}.h5'.format(task_type)) score(task, distance_file, score_file) # analyze analyze_file = os.path.join(temp_dir, 'analyze_{}.csv'.format(task_type)) analyze(task, score_file, analyze_file) # average abx_score = _average(analyze_file, task_type) return abx_score
def fullrun(): if not os.path.exists('example_items'): os.makedirs('example_items') item_file = 'example_items/data.item' feature_file = 'example_items/data.features' distance_file = 'example_items/data.distance' scorefilename = 'example_items/data.score' taskfilename = 'example_items/data.abx' analyzefilename = 'example_items/data.csv' items.generate_db_and_feat(3, 3, 1, item_file, 2, 2, feature_file) task = ABXpy.task.Task(item_file, 'c0', 'c1', 'c2', features=feature_file) task.generate_triplets() distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance) score.score(taskfilename, distance_file, scorefilename) analyze.analyze(scorefilename, taskfilename, analyzefilename)
def run_ABX(feat_file, task_file, dis_file, score_file, result_file, distance, normalized): """ Run distances, scores and results ABXpy steps based on provided features and task files. Results are saved in: $res_folder/distances/'$res_id'.distances $res_folder/scores/'$res_id'.scores $res_folder/results/'$res_id'.txt """ dis.compute_distances(feat_file, '/features/', task_file, dis_file, distance, normalized=normalized, n_cpu=1) sco.score(task_file, dis_file, score_file) ana.analyze(task_file, score_file, result_file)
def memorizable_abx(data_file, on, across, by, njobs, tmpdir=None, distance=cosine_distance, item_features_hash='0'): ''' wrap ABXpy funcions and compute the scores ''' item_file = '{}.item'.format(data_file) feature_file = '{}.features'.format(data_file) if not os.path.isfile(item_file) or not os.path.isfile(feature_file): raise ValueError('item_file or feature_file doesnt exist') distance_file = '{}.distance'.format(data_file) score_file = '{}.score'.format(data_file) task_file = '{}.abx'.format(data_file) analyze_file = '{}.csv'.format(data_file) # clean up before compute ABX remove_files = [distance_file, score_file, task_file, analyze_file] map(os.remove, filter(os.path.exists, remove_files)) # running the evaluation task = ABXpy.task.Task(item_file, on, across=across, by=by, verbose=False) task.generate_triplets(task_file, tmpdir=tmpdir) distances.compute_distances(feature_file, '/features/', task_file, distance_file, distance, normalized=True, n_cpu=njobs) score.score(task_file, distance_file, score_file) analyze.analyze(task_file, score_file, analyze_file) # I will keep only the ABX scores remove_files = [distance_file, score_file, task_file] map(os.remove, filter(os.path.exists, remove_files)) analyze_data = open(analyze_file, 'r').read() return analyze_data
def test_analyze( itemfile, featurefile, args, taskfile=None, distance=None, distancefile=None, scorefile=None, analyzefile=None, filename=None, ): on = get_arg("on", args) assert on, "The 'on' argument was not found, this argument is mandatory" "for the task" across = get_arg("across") by = get_arg("by") filters = get_arg("filters") reg = get_arg("reg") if not filename: filename = "_".join(filter(None, [get_name(itemfile), get_name(featurefile), str(on), str(across), str(by)])) if not distancefile: distancefile = filename + ".distance" if not scorefile: scorefile = filename + ".score" if not analyzefile: analyzefile = filename + ".csv" task = ABXpy.task.Task(itemfile, on, across, by, filters, reg, features=featurefile) task.generate_triplets() if not distance: distance = dtw_cosine_distance distances.compute_distances(featurefile, "/features/", taskfile, distancefile, distance) score.score(taskfile, distancefile, scorefile) analyze.analyze(scorefile, taskfile, analyzefile)
def fullrun(task, data_folder, feature_folder, h5, file_sizes, corpus, distance, outputdir, normalized, doall=True, ncpus=None): print("Processing task {}".format(task['section'])) feature_file = os.path.join(outputdir, lookup('featurefile', task)) try: if distance: distancepair = distance.split('.') distancemodule = distancepair[0] distancefunction = distancepair[1] path, mod = os.path.split(distancemodule) sys.path.insert(0, path) distancefun = getattr(__import__(mod), distancefunction) else: distancemodule = lookup('distancemodule', task, os.path.join(CURDIR, 'distance')) distancefunction = lookup('distancefunction', task, 'distance') path, mod = os.path.split(distancemodule) sys.path.insert(0, path) distancefun = getattr(__import__(mod), distancefunction) except: sys.stderr.write('distance not found\n') raise distance_file = os.path.join(outputdir, lookup('distancefile', task)) scorefilename = os.path.join(outputdir, lookup('scorefile', task)) taskfilename = os.path.join( data_folder, 'test', corpus, '{}s'.format(file_sizes), '{}s_{}.abx'.format(file_sizes, lookup('type', task))) # # taskfilename = os.path.join(CURDIR, lookup('taskfile', task)) # taskname = os.path.join( # lookup('taskdir', task), '{}/{}s_{}_{}.abx'.format( # corpus, file_sizes, distinction, lookup('type', task))) # taskfilename = os.path.abspath(os.path.join(CURDIR, taskname)) print('Task file is {}'.format(taskfilename)) assert os.path.isfile(taskfilename), 'Task file unknown' analyzefilename = os.path.join(outputdir, lookup('analyzefile', task)) # on = lookup('on', task) # across = nonesplit(lookup('across', task)) # by = nonesplit(lookup('by', task)) # filters = lookup('filters', task) # regressors = lookup('regressors', task) # sampling = lookup('sampling', task) if not ncpus: ncpus = int(lookup('ncpus', task, 1)) makedirs([feature_file, distance_file, scorefilename, analyzefilename]) # tasktime = getmtime(taskfilename) # featuretime = getmtime(feature_file) # distancetime = getmtime(distance_file) # scoretime = getmtime(scorefilename) # analyzetime = getmtime(analyzefilename) # featfoldertime = max([getmtime(os.path.join(feature_folder, f)) # for f in os.listdir(feature_folder)]) # Preprocessing if not h5: try: print("Preprocessing... Writing the features in h5 format") tryremove(feature_file) any2h5features.convert(feature_folder, h5_filename=feature_file, load=loadfeats) # featuretime = getmtime(feature_file) with h5py.File(feature_file) as fh: fh.attrs.create('done', True) except: sys.stderr.write('Error when writing the features from {} to {}\n' 'Check the paths availability\n'.format( os.path.realpath(feature_folder), os.path.realpath(feature_file))) # tryremove(feature_file) raise else: feature_file = os.path.join(feature_folder, '{}s.h5f'.format(file_sizes)) # computing try: print("Computing the distances") tryremove(distance_file) distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, distancefun, normalized=normalized, n_cpu=ncpus) tryremove(scorefilename) print("Computing the scores") score.score(taskfilename, distance_file, scorefilename) tryremove(analyzefilename) print("Collapsing the results") analyze.analyze(taskfilename, scorefilename, analyzefilename) return avg(analyzefilename, task) # except Exception as e: # sys.stderr.write('An error occured during the computation\n') # raise e finally: tryremove(distance_file) tryremove(scorefilename) tryremove(analyzefilename) if not h5: tryremove(feature_file)
def fullrun(): if type(BY) == list: out = '/' + 'on_' + ON[0:2] + '_ac_' + ACROSS[0:2] + '_by_' + BY[0][ 0:2] + '_' + BY[1][0:2] else: out = '/' + 'on_' + ON[0:2] + '_ac_' + ACROSS[0:2] + '_by_' + BY[0:2] output_folder = input_folder + out print("the input folder is " + input_folder + "\n") print("the ABX task id done :" + out + "\n") print(feature) if not os.path.exists(output_folder): os.makedirs(output_folder) item_file = input_folder + '/' + ON + '.item' feature_file = input_folder + '/' + feature distance_file = output_folder + '/' + out + '.distance' scorefilename = output_folder + '/' + out + '.score' taskfilename = output_folder + '/' + out + '.abx' analyzefilename = output_folder + '/' + out + '.csv' statsfilename = output_folder + '/' + out + '.stats' # running the evaluation: if not os.path.exists(taskfilename): if ACROSS == "na" and BY != "na": task = ABXpy.task.Task(item_file, ON, by=BY) elif BY == "na" and ACROSS != "na": task = ABXpy.task.Task(item_file, ON, across=ACROSS) elif ACROSS == "na" and BY == "na": task = ABXpy.task.Task(item_file, ON) else: task = ABXpy.task.Task(item_file, ON, across=ACROSS, by=BY) task.generate_triplets(taskfilename) try: task.print_stats(statsfilename) except: pass print("the abx task file is created") print("number of cpu used is " + str(NB_CPU)) if not os.path.exists(distance_file): if distance == 'cosine': distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_cosine_distance, normalized=True, n_cpu=NB_CPU) elif distance == 'kl': distances.compute_distances(feature_file, '/features/', taskfilename, distance_file, dtw_kl_divergence, normalized=True, n_cpu=NB_CPU) else: raise ValueError('distance must be either cosine or kl') print("Computing %(distance)s distance has been computed") else: print("distance has already been computed") if not os.path.exists(scorefilename): score.score(taskfilename, distance_file, scorefilename) print("Score is computed") if not os.path.exists(analyzefilename): analyze.analyze(taskfilename, scorefilename, analyzefilename) print("Raw results are available in the csv file !") eval_abx.avg(analyzefilename, out_res, ON, ACROSS, ponderate) print('evaluation done')