import numpy import editdistance import sys if len(sys.argv) < 5: print '''usage: python file.py size dimension delta typos file_name''' sys.exit(0) data_size = int(sys.argv[1]) data_dim = int(sys.argv[2]) delta = float(sys.argv[3]) data_typos = int(sys.argv[4]) file_number = sys.argv[5] alphabet_size = 2 Data = data_generation.random_data_generation(data_size, data_dim) # Data = data_generation.data_typo(data_dim, k=data_typos) data_dim = len(Data[0]) data_size = len(Data) block_s_metric = defaultdict() random_s_block = defaultdict() final_metric = defaultdict() partitions = shifts_gen.partition_string(Data[0]) num_partitions = len(partitions) def s_vals(): x_block = shifts_gen.partition_string(Data[0])[0] s_val = []
import data_generation import psi_generator import editdistance import utils import numpy Data = data_generation.random_data_generation(60, 2048) (embeddings, embed_time) = psi_generator.driver_embeddings(Data) edit_distance_nn = utils.nearest_neighbours_linear_scan( Data, Data, editdistance.eval, iterator_type='list') l1_distance_nn = utils.nearest_neighbours_linear_scan( embeddings, embeddings, utils.l_1, iterator_type='numpy') comparison = utils.compare_nearest_neighbours( Data, Data, editdistance.eval, edit_distance_nn, l1_distance_nn, print_summary=True, file_name='results/distortion_60_2048.png')
parser.print_help() return None else: (options, args) = parser.parse_args() return (options, args) def print_config(options): print options if __name__ == '__main__': [options, _] = option_parsing() print_config(options) if options.data == 'random': Data = data_generation.random_data_generation(options.size, options.dim, options.alphabet_size) elif options.data == 'typo': Data = data_generation.data_typo(options.dim, options.typos, options.alphabet_size) elif options.data == 'protein': Data = data_generation.read_file_protein(file_name=options.file) else: option_parsing(print_usage=True) sys.exit(0) driver_function(Data, options.alphabet_size, options.delta, options.file_suffix)
help='data source only valid if data=protein, default = raw_data/multigene_zfill.txt') if print_usage: parser.print_help() return None else: (options, args) = parser.parse_args() return (options, args) def print_config(options): print options if __name__ == '__main__': [options, _] = option_parsing() print_config(options) if options.data == 'random': Data = data_generation.random_data_generation( options.size, options.dim, options.alphabet_size) elif options.data == 'typo': Data = data_generation.data_typo(options.dim, options.typos, options.alphabet_size) elif options.data == 'protein': Data = data_generation.read_file_protein(file_name=options.file) else: option_parsing(print_usage=True) sys.exit(0) driver_function(Data, options.alphabet_size, options.delta, options.file_suffix)