def main() : util = w2lib.Week2Library() description = '''\ Generate the contigs from a collection of reads (with imperfect coverage). Input File format : --------------------------------------- ATG ATG TGT TGG CAT GGA GAT AGA Expected output : --------------------------------------- AGA ATG ATG CAT GAT TGGA TGT ''' args = util.create_parser(__file__, description) dataset_path = "{}/real_dataset.txt".format(DATASET_DIR) # Default to the dataset folder, if not provided if not args.file : args.file = dataset_path # Read from dataset file with open(args.file, 'r') as f : contents = f.readlines() kmers = [_.strip() for _ in contents] result = generateContigs(kmers) print(" ".join(result))
def main() : util = w2lib.Week2Library() description = '''\ Given adjacency list, it will reconstruct and output the Eulerian path. Input File format : --------------------------------------- 4 CTTA ACCA TACC GGCT GCTT TTAC Expected output : --------------------------------------- GGCTTACCA ''' args = util.create_parser(__file__, description) dataset_path = "{}/real_dataset.txt".format(DATASET_DIR) # Default to the dataset folder, if not provided if not args.file : args.file = dataset_path # Read from dataset file with open(args.file, 'r') as f : k = f.readline() kmers = [_.strip() for _ in f.readlines()] genome = stringReconstruction(int(k), kmers) print(genome)
def main(): util = w2lib.Week2Library() description = '''\ Given a kmer length, it will create all binary combinations with that length. (ex. k=3, 000, 001, 010, 011, 100, 101, 110, 111) It then takes the kmer, and construct a cycle that covers all of kmers to construct the universal circular string. Input File format : --------------------------------------- 4 Expected output : --------------------------------------- 0000110010111101 ''' args = util.create_parser(__file__, description) dataset_path = "{}/real_dataset.txt".format(DATASET_DIR) # Default to the dataset folder, if not provided if not args.file: args.file = dataset_path # Read from dataset file with open(args.file, 'r') as f: k = f.readline().strip() circular_string = universalCircularString(int(k)) print(circular_string)
def main(): util = w2lib.Week2Library() description = '''\ Given adjacency list, it will reconstruct and output the Eulerian path. Input File format : --------------------------------------- 1 -> 2 2 -> 3 3 -> 4,5 6 -> 7 7 -> 6 Expected output : --------------------------------------- 1 -> 2 -> 3 3 -> 4 3 -> 5 7 -> 6 -> 7 ''' args = util.create_parser(__file__, description) dataset_path = "{}/real_dataset.txt".format(DATASET_DIR) # Default to the dataset folder, if not provided if not args.file: args.file = dataset_path # Read from dataset file with open(args.file, 'r') as f: contents = f.readlines() # Format into adjacency list adj_dict = {} for content in contents: row = content.split("->") left_node = row[0].strip() right_node = row[1].strip().split(",") adj_dict[left_node] = right_node paths = maximalNonBranchingPaths(adj_dict) for path in paths: print(" -> ".join(path))
def main() : util = w2lib.Week2Library() description = '''\ Given adjacency list, it will reconstruct and output the Eulerian cycle. Input File format : --------------------------------------- 0 -> 3 1 -> 0 2 -> 1,6 3 -> 2 4 -> 2 5 -> 4 6 -> 5,8 7 -> 9 8 -> 7 9 -> 6 Expected output : --------------------------------------- 6->8->7->9->6->5->4->2->1->0->3->2->6 ''' args = util.create_parser(__file__, description) dataset_path = "{}/real_dataset.txt".format(DATASET_DIR) # Default to the dataset folder, if not provided if not args.file : args.file = dataset_path adjacency_dict = {} # Read from dataset file with open(args.file, 'r') as f : rows = f.readlines() for row in rows : row = row.replace(" ", "").strip().split("->") adjacency_dict[row[0]] = [r for r in row[1].split(",")] cycle = eulerCycle(adjacency_dict) print("->".join(cycle))
def main(): util = w2lib.Week2Library() description = '''\ Given k-length, distance of the paired reads, and the paired reads itself, it will try to align the paired reads to reconstruct the full string. Input File format : --------------------------------------- 4 2 GACC|GCGC ACCG|CGCC CCGA|GCCG CGAG|CCGG GAGC|CGGA Expected output : --------------------------------------- GACCGAGCGCCGGA ''' args = util.create_parser(__file__, description) dataset_path = "{}/real_dataset.txt".format(DATASET_DIR) # Default to the dataset folder, if not provided if not args.file: args.file = dataset_path # Read from dataset file with open(args.file, 'r') as f: k, d = f.readline().strip().split(" ") contents = f.readlines() paired_reads = [] for _ in contents: reads = _.split("|") read1 = reads[0].strip() read2 = reads[1].strip() paired_reads.append([read1, read2]) text = stringSpelledByGappedPatterns(int(k), int(d), paired_reads) print(text)
def main() : util = w2lib.Week2Library() description = '''\ Given adjacency list, it will reconstruct and output the Eulerian path. Input File format : --------------------------------------- 4 2 GAGA|TTGA TCGT|GATG CGTG|ATGT TGGT|TGAG GTGA|TGTT GTGG|GTGA TGAG|GTTG GGTC|GAGA GTCG|AGAT Expected output : --------------------------------------- GTGGTCGTGAGATGTTGA ''' args = util.create_parser(__file__, description) dataset_path = "{}/real_dataset.txt".format(DATASET_DIR) # Default to the dataset folder, if not provided if not args.file : args.file = dataset_path # Read from dataset file with open(args.file, 'r') as f : k, d = f.readline().strip().split(" ") contents = f.readlines() paired_reads = [_.strip() for _ in contents] text = stringReconstructionFromReadPairs(int(k), int(d), paired_reads) print(text)