示例#1
0
import numpy
import editdistance
import sys

if len(sys.argv) < 5:
  print '''usage: python file.py size dimension delta typos file_name'''
  sys.exit(0)

data_size = int(sys.argv[1])
data_dim = int(sys.argv[2])
delta = float(sys.argv[3])
data_typos = int(sys.argv[4])
file_number = sys.argv[5]
alphabet_size = 2

Data = data_generation.random_data_generation(data_size, data_dim)
# Data = data_generation.data_typo(data_dim, k=data_typos)
data_dim = len(Data[0])
data_size = len(Data)

block_s_metric = defaultdict()
random_s_block = defaultdict()
final_metric = defaultdict()

partitions = shifts_gen.partition_string(Data[0])
num_partitions = len(partitions)


def s_vals():
  x_block = shifts_gen.partition_string(Data[0])[0]
  s_val = []
示例#2
0
import data_generation
import psi_generator
import editdistance
import utils
import numpy

Data = data_generation.random_data_generation(60, 2048)
(embeddings, embed_time) = psi_generator.driver_embeddings(Data)

edit_distance_nn = utils.nearest_neighbours_linear_scan(
    Data, Data, editdistance.eval, iterator_type='list')

l1_distance_nn = utils.nearest_neighbours_linear_scan(
    embeddings, embeddings, utils.l_1, iterator_type='numpy')

comparison = utils.compare_nearest_neighbours(
    Data, Data, editdistance.eval, edit_distance_nn, l1_distance_nn, print_summary=True, file_name='results/distortion_60_2048.png')
示例#3
0
        parser.print_help()
        return None
    else:
        (options, args) = parser.parse_args()
        return (options, args)


def print_config(options):
    print options


if __name__ == '__main__':
    [options, _] = option_parsing()
    print_config(options)

    if options.data == 'random':
        Data = data_generation.random_data_generation(options.size,
                                                      options.dim,
                                                      options.alphabet_size)
    elif options.data == 'typo':
        Data = data_generation.data_typo(options.dim, options.typos,
                                         options.alphabet_size)
    elif options.data == 'protein':
        Data = data_generation.read_file_protein(file_name=options.file)
    else:
        option_parsing(print_usage=True)
        sys.exit(0)

    driver_function(Data, options.alphabet_size, options.delta,
                    options.file_suffix)
                      help='data source only valid if data=protein, default = raw_data/multigene_zfill.txt')

    if print_usage:
        parser.print_help()
        return None
    else:
        (options, args) = parser.parse_args()
        return (options, args)


def print_config(options):
    print options


if __name__ == '__main__':
    [options, _] = option_parsing()
    print_config(options)

    if options.data == 'random':
        Data = data_generation.random_data_generation(
            options.size, options.dim, options.alphabet_size)
    elif options.data == 'typo':
        Data = data_generation.data_typo(options.dim, options.typos, options.alphabet_size)
    elif options.data == 'protein':
        Data = data_generation.read_file_protein(file_name=options.file)
    else:
        option_parsing(print_usage=True)
        sys.exit(0)

    driver_function(Data, options.alphabet_size, options.delta, options.file_suffix)