Пример #1
0
def omsim(settings):
        # cd to the directory containing the configuration file
        os.chdir(settings.directory)
        # process input
        cmaps = import_input(settings)
        print('Imported ' + str(sum(cmaps[iname].count() for iname in cmaps)) + ' nicks in ' + str(sum(cmaps[iname].seq_len() for iname in cmaps)) + 'bp.')
        cmaps = KMP(settings, cmaps)
        # write processed input
        write_processed_input(settings, cmaps)
        # filter input for enzymes / files we need
        seqs, seq_lens, fns = filter_input(settings, cmaps)
        prev = 0
        cum_seq_lens = []
        for seq_len in seq_lens:
                curr = prev + seq_len
                cum_seq_lens += [curr]
                prev = curr
        print('Using ' + str(sum(len(f) for f in fns)) + ' nicks in ' + str(sum(seq_lens)) + 'bp.')
        #compute reverse nicking sites
        rns = get_rns(settings, fns, seq_lens)
        #estimate number of chips based on expected coverage
        if settings.chips == 0:
                temp = int(sum(seq_lens) * settings.coverage / (settings.scans_per_chip * settings.get_scan_size()))
                settings.chips = temp if temp > 1 else 1
        #estimate coverage
        settings.estimated_coverage = int(settings.get_scan_size() * settings.scans_per_chip * settings.chips / float(sum(seq_lens)))
        print('Generating reads on ' + str(settings.chips) + ' chip' + ('' if settings.chips == 1 else 's') + ', estimated coverage: ' + str(settings.estimated_coverage) + 'x.')
        noise = Noise(settings)
        bnx = BNX(settings, noise)
        # generate reads
        for chip in range(1, settings.chips + 1):
                chip_settings = {'size': 0, 'scans': 0,
                                 'chip_id': '20249,11843,07/17/2014,840014289', 'run_id': str(chip),
                                 'flowcell': 1, 'molecule_count': 0,
                                 'bpp': 425, 'stretch_factor': noise.chip_stretch_factor()}
                chip_settings['bpp'] /= chip_settings['stretch_factor']
                molecules = {}
                for label in settings.labels:
                        molecules[label] = []
                # generate reads
                moleculeID = 0
                relative_stretch = []
                for scan in range(1, settings.scans_per_chip + 1):
                        chip_settings['scans'] += 1
                        scan_stretch = noise.scan_stretch_factor(chip_settings['stretch_factor'])
                        for l, m, meta in noise.generate_scan(seq_lens, cum_seq_lens, fns, rns):
                                        moleculeID += 1
                                        molecule = {}
                                        for label in settings.labels:
                                                molecule[label] = []
                                        for nick in m:
                                                molecule[nick[1]['label']].append(nick[0])
                                        for label in settings.labels:
                                                if settings.min_nicks <= len(molecule[label]):
                                                        molecules[label].append((l, molecule[label], chip_settings['scans'], meta))
                                                        relative_stretch.append(noise.mol_stretch_factor(scan_stretch) / chip_settings['stretch_factor'])
                                        chip_settings['molecule_count'] += 1
                                        chip_settings['size'] += l
                # write output
                for label in settings.labels:
                        moleculeID = 0
                        ofile = open(settings.prefix + '.' + label + '.' + str(chip) + '.bnx', 'w')
                        #bedfile = open(settings.prefix + '.' + label + '.' + str(chip) + '.bed', 'w')
                        bnx.write_bnx_header(ofile, label, chip_settings)
                        for l, m, s, meta in molecules[label]:
                                moleculeID += 1
                                bnx.write_bnx_entry((moleculeID, l, s), m, ofile, chip_settings, relative_stretch[moleculeID - 1])
                                #for idx, mol in enumerate(meta):
                                        #bedfile.write(seqs[mol[0]] + '\t' + str(mol[1]) + '\t' + str(mol[1] + l) + '\t' + str(moleculeID) + ('.' + str(idx) if len(meta) > 1 else '') + '\n')
                        ofile.close()
                        #bedfile.close()
                print('Finished chip ' + str(chip) + '/' + str(settings.chips))
        print('Finished processing ' + settings.name + '.\n')
Пример #2
0
def omsim(settings):
    # cd to the directory containing the configuration file
    os.chdir(settings.directory)
    # process input
    cmaps = import_input(settings)
    print('Imported ' + str(sum(cmaps[iname].count() for iname in cmaps)) +
          ' nicks in ' + str(sum(cmaps[iname].seq_len()
                                 for iname in cmaps)) + 'bp.')
    cmaps = KMP(settings, cmaps)
    # write processed input
    write_processed_input(settings, cmaps)
    # filter input for enzymes / files we need
    seqs, seq_lens, fns = filter_input(settings, cmaps)
    prev = 0
    cum_seq_lens = []
    for seq_len in seq_lens:
        curr = prev + seq_len
        cum_seq_lens += [curr]
        prev = curr
    print('Using ' + str(sum(len(f) for f in fns)) + ' nicks in ' +
          str(sum(seq_lens)) + 'bp.')
    #compute reverse nicking sites
    rns = get_rns(settings, fns, seq_lens)
    #estimate number of chips based on expected coverage
    if settings.chips == 0:
        temp = int(
            sum(seq_lens) * settings.coverage /
            (settings.scans_per_chip * settings.get_scan_size()))
        settings.chips = temp if temp > 1 else 1
    #estimate coverage
    settings.estimated_coverage = int(
        settings.get_scan_size() * settings.scans_per_chip * settings.chips /
        float(sum(seq_lens)))
    print('Generating reads on ' + str(settings.chips) + ' chip' +
          ('' if settings.chips == 1 else 's') + ', estimated coverage: ' +
          str(settings.estimated_coverage) + 'x.')
    noise = Noise(settings)
    bnx = BNX(settings, noise)
    # generate reads
    for chip in range(1, settings.chips + 1):
        chip_settings = {
            'size': 0,
            'scans': 0,
            'chip_id': '20249,11843,07/17/2014,840014289',
            'run_id': str(chip),
            'flowcell': 1,
            'molecule_count': 0,
            'bpp': 425,
            'stretch_factor': noise.chip_stretch_factor()
        }
        chip_settings['bpp'] /= chip_settings['stretch_factor']
        molecules = {}
        for label in settings.labels:
            molecules[label] = []
        # generate reads
        moleculeID = 0
        relative_stretch = []
        for scan in range(1, settings.scans_per_chip + 1):
            chip_settings['scans'] += 1
            scan_stretch = noise.scan_stretch_factor(
                chip_settings['stretch_factor'])
            for l, m, meta in noise.generate_scan(seq_lens, cum_seq_lens, fns,
                                                  rns):
                moleculeID += 1
                molecule = {}
                for label in settings.labels:
                    molecule[label] = []
                for nick in m:
                    molecule[nick[1]['label']].append(nick[0])
                for label in settings.labels:
                    if settings.min_nicks <= len(molecule[label]):
                        molecules[label].append(
                            (l, molecule[label], chip_settings['scans'], meta))
                        relative_stretch.append(
                            noise.mol_stretch_factor(scan_stretch) /
                            chip_settings['stretch_factor'])
                chip_settings['molecule_count'] += 1
                chip_settings['size'] += l
        # write output
        for label in settings.labels:
            moleculeID = 0
            ofile = open(
                settings.prefix + '.' + label + '.' + str(chip) + '.bnx', 'w')
            #bedfile = open(settings.prefix + '.' + label + '.' + str(chip) + '.bed', 'w')
            bnx.write_bnx_header(ofile, label, chip_settings)
            for l, m, s, meta in molecules[label]:
                moleculeID += 1
                bnx.write_bnx_entry((moleculeID, l, s), m, ofile,
                                    chip_settings,
                                    relative_stretch[moleculeID - 1])
                #for idx, mol in enumerate(meta):
                #bedfile.write(seqs[mol[0]] + '\t' + str(mol[1]) + '\t' + str(mol[1] + l) + '\t' + str(moleculeID) + ('.' + str(idx) if len(meta) > 1 else '') + '\n')
            ofile.close()
            #bedfile.close()
        print('Finished chip ' + str(chip) + '/' + str(settings.chips))
    print('Finished processing ' + settings.name + '.\n')