示例#1
0
    help=
    'A range for sampling kmer by kmer number, use "[]" to include margin, and "()" to exclude margin.'
)
args = parser.parse_args()
# args = parser.parse_args(['-i', 'kmer.json', '-o', 'kmer.sample.json', '-r', '[2300, 10000]'])
inputFile = args.input
outputFile = args.output
rang = args.range
print('Sampling range is {0}'.format(rang))
left = int(rang[1:-1].split(',')[0])
right = int(rang[1:-1].split(',')[1])
if rang[0] == '[':
    left -= 1
else:
    pass
if rang[-1] == ']':
    right += 1
else:
    pass

count = 0
countPass = 0
beads = seqIO.beadJson(inputFile)
with open(outputFile, 'w') as f:
    for item in beads:
        count += 1
        kmerNumber = len(list(item.values())[0])
        if kmerNumber > left and kmerNumber < right:
            countPass += 1
            f.write('{0}\n'.format(json.dumps(item)))
        print(count, countPass)
示例#2
0
from metaSeq import io as seqIO

parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='Input Kmer json file.')
parser.add_argument('-o', '--output', help='Output pairwise jacarrd.')
args = parser.parse_args()
#args = parser.parse_args(['-i', 'kmer.sample.json', '-o', 'kmer.sample.nrdt.json'])
inputFile = args.input
outputFile = args.output
with open('kmer.sample.nrdt.json', 'r') as f:
    nrd = json.load(f)

#% Test the pairwise distance
from itertools import combinations
beadPool = []
for item in seqIO.beadJson(inputFile):
    barcode = list(item.keys())[0]
    kmers = item[barcode]
    beadPool.append((barcode, kmers))

pd = []
count = 0
for pair in combinations(beadPool, 2):
    count += 1
    if len(pair[0]) < len(pair[1]):
        k1 = pair[0]
        k2 = pair[1]
    else:
        k1 = pair[1]
        k2 = pair[0]
    share = 0
示例#3
0
import argparse
from metaSeq import kmer
from metaSeq import io as seqIO
from itertools import combinations
import random

parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='JSON kmer file.')
parser.add_argument('-o', '--output', help='Output file.')
args = parser.parse_args()
# args = parser.parse_args(['-i', 'kmer.sample.json', '-o', 'kmer.distance.tsv'])
inputFile = args.input
outputFile = args.output

print('Read in kmer json file')
kmerParser = seqIO.beadJson('kmer.sample.json')
kmers = []
for item in kmerParser:
    currentBead = list(list(item.items())[0])
    currentBead[1] = random.sample(currentBead[1], len(currentBead[1]) // 10)
    kmers.append(currentBead)
print('Found {0} beads'.format(len(kmers)))
print('Start calculating kmer distance')
mashD = []
count = 0
for pairs in combinations(kmers, 2):
    count += 1
    if count // 100000 > 0 and count % 10000 == 0:
        print(count)
    k1 = pairs[0]
    k2 = pairs[1]
示例#4
0
        if item[2] > 0.02:
            f.write('{0}\t{1}\t{2}\n'.format(item[0], item[1], item[2]))
'''

#%%
''' Extract bead sequences by module number '''
from metaSeq import io as seqIO
from metaSeq import bead

module = {}
with open('kmer.jcd.0.02.module.txt', 'r') as f:
    f.readline()
    for line in f:
        line = line.strip('\n').split('\t')
        module[line[0]] = line[1]
print(len(module))

cluster = {}

for item in list(set(module.values())):
    cluster[item] = []
print(len(cluster))
beads = seqIO.beadJson('CL100077200_L01.json')
for item in beads:
    b = bead.beadSequence(item)
    classNumber = module.get(b.barcode, False)
    if classNumber:
        cluster[classNumber] += b.fastaSequences()
print(len(cluster))
for key, value in cluster.items():
    seqIO.write_seqs(value, 'cluster/{0}.fa'.format(key), fastx='a', mode='w')