示例#1
0
def get_base_masks(rundir):
    """ Return a set of base masks to be used when demultiplexing.

    :param str rundir: Path to the run directory.
    :returns list: Basemasks to be used based on the SampleSeet information.
    """
    runsetup = parsers.get_read_configuration(rundir, sort=True)
    flowcell_id = parsers.get_flowcell_id(rundir)
    base_masks = {}

    #Create groups of reads by index length
    ss_name = os.path.join(rundir, str(flowcell_id) + '.csv')
    if os.path.exists(ss_name):
        ss = csv.DictReader(open(ss_name, 'rb'), delimiter=',')
        samplesheet = []
        [samplesheet.append(read) for read in ss]
        for r in samplesheet:
            index_length = len(r['Index'].replace('-', '').replace('NoIndex', ''))
            if not base_masks.has_key(index_length):
                base_masks[index_length] = {'base_mask': [],
                                            'samples': {'fieldnames': ss.fieldnames, 'samples':[]}}
            base_masks[index_length]['samples']['samples'].append(r)

    #Create the basemask for each group
    for index_size, index_group in base_masks.iteritems():
        index_size = index_size
        group = index_size
        bm = []
        per_index_size = index_size/(int(last_index_read(rundir)) - 1)

        for read in runsetup:
            cycles = read['NumCycles']
            if read['IsIndexedRead'] == 'N':
                bm.append('Y' + cycles)
            else:
                # I_iN_y(,I_iN_y) or I(,I)
                if index_size > int(cycles):
                    i_remainder = int(cycles) - per_index_size
                    if i_remainder > 0:
                        bm.append('I' + str(per_index_size) + 'N' + str(i_remainder))
                    else:
                        bm.append('I' + cycles)
                # I_iN_y(,N) or I(,N)
                else:
                    if index_size > 0:
                        to_mask = "I" + str(index_size)
                        if index_size < int(cycles):
                           to_mask = to_mask + 'N' + str(int(cycles) - index_size)
                        bm.append(to_mask)
                        index_size = 0
                    else:
                        bm.append('N' + cycles)
        base_masks[group]['base_mask'] = bm
    return base_masks
示例#2
0
def last_index_read(directory):
    """Parse the number of the highest index read from the RunInfo.xml
    """
    read_numbers = [int(read.get("Number", 0)) for read in parsers.get_read_configuration(directory) if read.get("IsIndexedRead", "") == "Y"]
    return 0 if len(read_numbers) == 0 else max(read_numbers)