help= 'A range for sampling kmer by kmer number, use "[]" to include margin, and "()" to exclude margin.' ) args = parser.parse_args() # args = parser.parse_args(['-i', 'kmer.json', '-o', 'kmer.sample.json', '-r', '[2300, 10000]']) inputFile = args.input outputFile = args.output rang = args.range print('Sampling range is {0}'.format(rang)) left = int(rang[1:-1].split(',')[0]) right = int(rang[1:-1].split(',')[1]) if rang[0] == '[': left -= 1 else: pass if rang[-1] == ']': right += 1 else: pass count = 0 countPass = 0 beads = seqIO.beadJson(inputFile) with open(outputFile, 'w') as f: for item in beads: count += 1 kmerNumber = len(list(item.values())[0]) if kmerNumber > left and kmerNumber < right: countPass += 1 f.write('{0}\n'.format(json.dumps(item))) print(count, countPass)
from metaSeq import io as seqIO parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help='Input Kmer json file.') parser.add_argument('-o', '--output', help='Output pairwise jacarrd.') args = parser.parse_args() #args = parser.parse_args(['-i', 'kmer.sample.json', '-o', 'kmer.sample.nrdt.json']) inputFile = args.input outputFile = args.output with open('kmer.sample.nrdt.json', 'r') as f: nrd = json.load(f) #% Test the pairwise distance from itertools import combinations beadPool = [] for item in seqIO.beadJson(inputFile): barcode = list(item.keys())[0] kmers = item[barcode] beadPool.append((barcode, kmers)) pd = [] count = 0 for pair in combinations(beadPool, 2): count += 1 if len(pair[0]) < len(pair[1]): k1 = pair[0] k2 = pair[1] else: k1 = pair[1] k2 = pair[0] share = 0
import argparse from metaSeq import kmer from metaSeq import io as seqIO from itertools import combinations import random parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help='JSON kmer file.') parser.add_argument('-o', '--output', help='Output file.') args = parser.parse_args() # args = parser.parse_args(['-i', 'kmer.sample.json', '-o', 'kmer.distance.tsv']) inputFile = args.input outputFile = args.output print('Read in kmer json file') kmerParser = seqIO.beadJson('kmer.sample.json') kmers = [] for item in kmerParser: currentBead = list(list(item.items())[0]) currentBead[1] = random.sample(currentBead[1], len(currentBead[1]) // 10) kmers.append(currentBead) print('Found {0} beads'.format(len(kmers))) print('Start calculating kmer distance') mashD = [] count = 0 for pairs in combinations(kmers, 2): count += 1 if count // 100000 > 0 and count % 10000 == 0: print(count) k1 = pairs[0] k2 = pairs[1]
if item[2] > 0.02: f.write('{0}\t{1}\t{2}\n'.format(item[0], item[1], item[2])) ''' #%% ''' Extract bead sequences by module number ''' from metaSeq import io as seqIO from metaSeq import bead module = {} with open('kmer.jcd.0.02.module.txt', 'r') as f: f.readline() for line in f: line = line.strip('\n').split('\t') module[line[0]] = line[1] print(len(module)) cluster = {} for item in list(set(module.values())): cluster[item] = [] print(len(cluster)) beads = seqIO.beadJson('CL100077200_L01.json') for item in beads: b = bead.beadSequence(item) classNumber = module.get(b.barcode, False) if classNumber: cluster[classNumber] += b.fastaSequences() print(len(cluster)) for key, value in cluster.items(): seqIO.write_seqs(value, 'cluster/{0}.fa'.format(key), fastx='a', mode='w')