Пример #1
0
if 1 < len(sys.argv):
    fasta_file = sys.argv[1]
else:
    fasta_file = os.path.normpath(get_fasta_file('T00759-small.fa'))
algorithm = stempy.Algorithm(options)
algorithm.initialise(fasta_file.encode(sys.stdin.encoding or 'ascii'))
algorithm._initialise_p_value_tables()
data = algorithm.data


#
# Create model, set pseudo-counts and seed
#
logging.info('Creating model')
bg = algorithm._get_bg_model(W)
bs = stempy.PssmBindingSiteModel(stempy.initialise_uniform_pssm(W, algorithm.options.alphabet_size))
model = stempy.Model(data, bs, bg, _lambda=algorithm.options.lambda_)
model.bs.seed_pseudo_counts = options.starts_seed_pseudo_counts
logging.info('Seeding model with %s', seed)
if W != len(seed):
    raise ValueError('Seed must be same length as motif.')
model.bs.seed(seed, True)
model.set_lambda_for_sites(data.num_sequences)


for BestWMerFinder in [
    stempy.FindBestWMersMultiIndex, 
    # stempy.FindBestWMersSet, 
    stempy.FindBestWMersSortedVec
]:
    #
# load the sequences
num_bases, seqs, ids, index = stempy.read_sequences(fasta, options)

# create the data object
with Timer(msg='build data'):
    data = stempy.Data(index, max_W=options.max_w)

# get the background
mm, freqs = stempy.create_markov_model_order_from_index_4(data.index, options.back_dist_prior)
freqs_with_pseudo_counts = freqs.add_pseudo_counts(options.back_dist_prior)
lls = mm.calculate_likelihoods(data)
bg_model = stempy.create_bg_model_from_base_likelihoods(W, data, lls, freqs_with_pseudo_counts)

# binding site model
bs_model = stempy.PssmBindingSiteModel(stempy.initialise_uniform_pssm(W, options.alphabet_size))
bs_model.seed(seed)

# whole model
model = stempy.Model(data, bs_model, bg_model, _lambda=0.)

Z_threshold = .3
with Timer(msg='find instances with Z>%f' % Z_threshold):
    instance_finder = stempy.FindInstances(data, model, Z_threshold)
    instance_finder()
    logging.info('Found %d instances', len(instance_finder.instances))


num_W_mers_to_find = 10000
with Timer(msg='find %d best W-mers' % num_W_mers_to_find):
    w_mer_finder = stempy.create_best_w_mer_finder(data, model, num_W_mers_to_find)