示例#1
0
文件: legion.py 项目: mscook/nesoni
def _run_and_save_state(action, timestamp):
    #filename = os.path.join('.state', grace.filesystem_friendly_name(action.ident()))
    #temp_filename = os.path.join('.state', 'temp-' + grace.filesystem_friendly_name(action.ident()))
    filename = action.state_filename()
    temp_filename = filename + '.temp'

    if os.path.exists(filename):
        os.unlink(filename)

    if selection.matches(LOCAL.done_selection, [action.shell_name()]):
        result = None
    else:
        result = action.run()

    LOCAL.time = max(LOCAL.time, timestamp)
    action.timestamp = timestamp
    action.timestamp_for = filename
    action.timestamp_cwd = os.getcwd()
    #timestamp_for is used to ensure the action is being
    # run from the same (relative) current directory as previously

    dirname = os.path.dirname(filename)
    if dirname and not os.path.exists(dirname):
        os.mkdir(dirname)

    with open(temp_filename, 'wb') as f:
        pickle.dump(action, f)
    os.rename(temp_filename, filename)

    return result
示例#2
0
    def run(self):
        if self.output is not None:
            out_file = open(self.output, 'wb')
        else:
            out_file = sys.stdout

        annotation.write_gff3_header(out_file)

        for filename in self.filenames:
            for item in annotation.read_annotations(filename):
                if not selection.matches(self.select, [item.type]): continue

                if 'ID' not in item.attr and 'locus_tag' in item.attr:
                    item.attr['ID'] = item.attr['locus_tag']

                if 'color' not in item.attr:
                    if item.type == 'CDS':
                        item.attr['color'] = '#008800'
                    if item.type == 'rRNA':
                        item.attr['color'] = '#bb0000'
                    if item.type == 'tRNA':
                        item.attr['color'] = '#bb00bb'
                    if item.type == 'misc_feature':
                        item.attr['color'] = '#8888ff'

                print >> out_file, item.as_gff()

        if self.output is not None:
            out_file.close()
def _get_timestamp(action):
    """ Look for ident() in .state subdirectory of current directory.
        If pickled value matches return the timestamp.
    """
    if selection.matches(LOCAL.do_selection, [action.shell_name()]):
        return None
    
    try:
        for filename in [
            action.state_filename(),
            os.path.join('.state', grace.filesystem_friendly_name(action.ident())), #Old location of state files
        ]:
            if os.path.exists(filename):
                with open(filename,'rb') as f:
                    old = pickle.load(f)
                
                if action != old:
                    return None
                
                if not hasattr(old, 'timestamp'):
                    return None                        
                
                if hasattr(old, 'timestamp_for') and old.timestamp_for != filename:
                    return None
                
                return old.timestamp
                
                #for parameter in self.parameters:
                #    if parameter.get(self) != parameter.get(old):
                #        print >> sys.stderr, parameter.name, parameter.get(old), '->', parameter.get(self)            
    except Exception, error:
        import traceback
        traceback.print_exc()
        print >> sys.stderr, 'Error making %s, re-running: %s' % (action.ident(), error)
def _run_and_save_state(action, timestamp):
    #filename = os.path.join('.state', grace.filesystem_friendly_name(action.ident()))
    #temp_filename = os.path.join('.state', 'temp-' + grace.filesystem_friendly_name(action.ident()))
    filename = action.state_filename()
    temp_filename = filename + '.temp'
    
    if os.path.exists(filename):
        os.unlink(filename)
    
    if selection.matches(LOCAL.done_selection, [action.shell_name()]):
        result = None
    else:
        result = action.run()
    
    LOCAL.time = max(LOCAL.time, timestamp)
    action.timestamp = timestamp
    action.timestamp_for = filename 
    action.timestamp_cwd = os.getcwd()
    #timestamp_for is used to ensure the action is being 
    # run from the same (relative) current directory as previously

    dirname = os.path.dirname(filename)
    if dirname and not os.path.exists(dirname):
        os.mkdir(dirname)

    with open(temp_filename,'wb') as f:
        pickle.dump(action, f)
    os.rename(temp_filename, filename)
    
    return result
示例#5
0
文件: trivia.py 项目: drpowell/nesoni
    def run(self):
        if self.output is not None:
           out_file = open(self.output,'wb')
        else:
           out_file = sys.stdout
    
        annotation.write_gff3_header(out_file)
        
        for filename in self.filenames:
            for item in annotation.read_annotations(filename):
                if not selection.matches(self.select, [item.type]): continue
                
                if 'ID' not in item.attr and 'locus_tag' in item.attr:
                    item.attr['ID'] = item.attr['locus_tag']
                    
                if 'color' not in item.attr:
                    if item.type == 'CDS':
                        item.attr['color'] = '#008800'
                    if item.type == 'rRNA':
                        item.attr['color'] = '#bb0000'
                    if item.type == 'tRNA':
                        item.attr['color'] = '#bb00bb'
                    if item.type == 'misc_feature':
                        item.attr['color'] = '#8888ff'

                print >> out_file, item.as_gff()
        
        if self.output is not None:
            out_file.close()
示例#6
0
    def run(self):
        features_parent = [ 
            _Related_feature(item,item.start,item.end,[]) 
            for item in annotation.read_annotations(self.parent) 
            if selection.matches(self.select_parent, [item.type]) 
            ]
        features_child = [ 
            _Related_feature(item,item.start,item.end,[]) 
            for item in annotation.read_annotations(self.child) 
            if selection.matches(self.select_child, [item.type])
            ]
        
        index = { }
        for item in features_child:
            if item.feature.seqid not in index:
                index[item.feature.seqid] = span_index.Span_index()
            index[item.feature.seqid].insert(item)
        for value in index.values():
            value.prepare()
        
        for item_1 in features_parent:
            if item_1.feature.strand == 1:
                start = item_1.start - self.upstrand
                end = item_1.end + self.downstrand
            elif item_1.feature.strand == -1:
                start = item_1.start - self.downstrand
                end = item_1.end + self.upstrand
            else:
                start = item_1.start - max(self.upstrand,self.downstrand)
                end = item_1.end + max(self.upstrand,self.downstrand)
            if item_1.feature.seqid in index:
                for item_2 in index[item_1.feature.seqid].get(start,end):
                    item_1.relations.append(item_2)
                    item_2.relations.append(item_1)

        for item in features_parent:
            item.modify_with_relations(self.use, self.to_child, self.to_parent)
        
        with open(self.prefix + '-parent.gff','wb') as f:
            annotation.write_gff3_header(f)
            for item in features_parent:
                print >> f, item.feature.as_gff()
        
        with open(self.prefix + '-child.gff','wb') as f:
            annotation.write_gff3_header(f)
            for item in features_child:
                print >> f, item.feature.as_gff()
示例#7
0
    def run(self):
        features_parent = [
            _Related_feature(item, item.start, item.end, [])
            for item in annotation.read_annotations(self.parent)
            if selection.matches(self.select_parent, [item.type])
        ]
        features_child = [
            _Related_feature(item, item.start, item.end, [])
            for item in annotation.read_annotations(self.child)
            if selection.matches(self.select_child, [item.type])
        ]

        index = {}
        for item in features_child:
            if item.feature.seqid not in index:
                index[item.feature.seqid] = span_index.Span_index()
            index[item.feature.seqid].insert(item)
        for value in index.values():
            value.prepare()

        for item_1 in features_parent:
            if item_1.feature.strand == 1:
                start = item_1.start - self.upstrand
                end = item_1.end + self.downstrand
            elif item_1.feature.strand == -1:
                start = item_1.start - self.downstrand
                end = item_1.end + self.upstrand
            else:
                start = item_1.start - max(self.upstrand, self.downstrand)
                end = item_1.end + max(self.upstrand, self.downstrand)
            if item_1.feature.seqid in index:
                for item_2 in index[item_1.feature.seqid].get(start, end):
                    item_1.relations.append(item_2)
                    item_2.relations.append(item_1)

        for item in features_parent:
            item.modify_with_relations(self.use, self.to_child, self.to_parent)

        with open(self.prefix + '-parent.gff', 'wb') as f:
            annotation.write_gff3_header(f)
            for item in features_parent:
                print >> f, item.feature.as_gff()

        with open(self.prefix + '-child.gff', 'wb') as f:
            annotation.write_gff3_header(f)
            for item in features_child:
                print >> f, item.feature.as_gff()
示例#8
0
 def convert(filename):
     info = io.get_file_info(filename)
     ok = selection.matches('type-fastq:[compression-none/compression-gzip/compression-bzip2]', info)
     if ok:
         return filename            
     result_name = tempname()
     with open(result_name,'wb') as f:
         for name, seq, qual in io.read_sequences(filename, qualities='required'):
             io.write_fastq(f, name, seq, qual)
     return result_name
示例#9
0
    def run(self):
        assert self.change_strand in STRAND_CHANGE, 'Unknown way to change strand.'
        strand_changer = STRAND_CHANGE[self.change_strand]

        shift_start_absolute, shift_start_proportion = decode_shift(
            self.shift_start)
        shift_end_absolute, shift_end_proportion = decode_shift(self.shift_end)

        renames = []
        if self.rename:
            for item in self.rename.split(','):
                new, old = item.split('=')
                if new != old:
                    renames.append((new, old))

        out_file = open(self.prefix + '.gff', 'wb')
        annotation.write_gff3_header(out_file)

        for filename in self.filenames:
            for item in annotation.read_annotations(filename):
                if not selection.matches(self.select, [item.type]): continue

                if self.type:
                    item.type = self.type

                length = item.end - item.start
                shift_start = int(
                    math.floor(0.5 + shift_start_absolute +
                               shift_start_proportion * length))
                shift_end = int(
                    math.floor(0.5 + shift_end_absolute +
                               shift_end_proportion * length))

                if item.strand == 1:
                    item.start += shift_start
                    item.end += shift_end
                elif item.strand == -1:
                    item.end -= shift_start
                    item.start -= shift_end
                item.start = max(0, item.start)  #IGV complains

                item.strand = strand_changer[item.strand]

                old_attr = item.attr.copy()
                for new, old in renames:
                    if old in item.attr:
                        del item.attr[old]
                for new, old in renames:
                    if old in old_attr:
                        item.attr[new] = old_attr[old]

                print >> out_file, item.as_gff()

        out_file.close()
示例#10
0
 def modify_with_relations(self, use, to_child, to_parent):
     buckets = collections.defaultdict(list)
     
     my_strand = self.feature.strand or 0
     for item in self.relations:
         their_strand = item.feature.strand or 0
         overlaps = self.feature.overlaps(item.feature,check_strand=False)
         if my_strand * their_strand == -1:
             if overlaps:
                 relation = 'opposite'
             elif item.feature.start*my_strand < self.feature.start*my_strand:
                 relation = 'upstrand_opposite'
             else:
                 relation = 'downstrand_opposite'
         elif overlaps:
             relation = 'in'
         else:
             strand = my_strand or their_strand
             if not strand:
                 relation = 'near'                
             else:
                 if item.feature.start*strand < self.feature.start*strand:
                     relation = 'upstrand'
                 else:
                     relation = 'downstrand'
         
         buckets[relation].append(item)
     
     for name,relatives in buckets.items():        
         if selection.matches(use, [name]):
             for relative in relatives:
                 self.add_to_attr('has_'+name, relative.feature.get_id())
                 relative.add_to_attr('is_'+name, self.feature.get_id())
                 relative.add_to_attr('Parent', self.feature.get_id())
                 
                 for key in self.feature.attr:
                     if selection.matches(to_child,[key]):
                         relative.add_to_attr(key, self.feature.attr[key])
                 for key in relative.feature.attr:
                     if selection.matches(to_parent,[key]):
                         self.add_to_attr(key, relative.feature.attr[key])
示例#11
0
    def modify_with_relations(self, use, to_child, to_parent):
        buckets = collections.defaultdict(list)

        my_strand = self.feature.strand or 0
        for item in self.relations:
            their_strand = item.feature.strand or 0
            overlaps = self.feature.overlaps(item.feature, check_strand=False)
            if my_strand * their_strand == -1:
                if overlaps:
                    relation = 'opposite'
                elif item.feature.start * my_strand < self.feature.start * my_strand:
                    relation = 'upstrand_opposite'
                else:
                    relation = 'downstrand_opposite'
            elif overlaps:
                relation = 'in'
            else:
                strand = my_strand or their_strand
                if not strand:
                    relation = 'near'
                else:
                    if item.feature.start * strand < self.feature.start * strand:
                        relation = 'upstrand'
                    else:
                        relation = 'downstrand'

            buckets[relation].append(item)

        for name, relatives in buckets.items():
            if selection.matches(use, [name]):
                for relative in relatives:
                    self.add_to_attr('has_' + name, relative.feature.get_id())
                    relative.add_to_attr('is_' + name, self.feature.get_id())
                    relative.add_to_attr('Parent', self.feature.get_id())

                    for key in self.feature.attr:
                        if selection.matches(to_child, [key]):
                            relative.add_to_attr(key, self.feature.attr[key])
                    for key in relative.feature.attr:
                        if selection.matches(to_parent, [key]):
                            self.add_to_attr(key, relative.feature.attr[key])
示例#12
0
    def run(self):
        annotations = [ ]
        for filename in self.filenames:
            for item in annotation.read_annotations(filename):
                if not selection.matches(self.select, [item.type]): continue
                if self.type:
                    item.type = self.type
                annotations.append(item)
        
        annotations.sort(key=lambda item: (item.seqid, item.strand, item.start))
        
        group = [ ]
        groups = [ ]
        def emit():
            if not group: return
            groups.append(group[:])
            del group[:]        
        seqid = None
        strand = None
        end = 0
        for item in annotations:
            if item.seqid != seqid or item.strand != strand or item.start >= end:
                emit()
                seqid = item.seqid
                strand = item.strand
                end = item.end-self.overlap
            group.append(item)
            end = max(item.end-self.overlap, end)
        emit()

        out_file = open(self.prefix+'.gff','wb')
        annotation.write_gff3_header(out_file)

        for group in groups:
            item = annotation.Annotation()
            item.source = group[0].source
            item.type = join_descriptions( item2.type for item2 in group )
            item.seqid = group[0].seqid
            item.strand = group[0].strand
            item.start = min( item2.start for item2 in group )
            item.end = max( item2.end for item2 in group )
            item.score = None
            item.phase = None
            item.attr = { }
            
            for item2 in group:
                for key in item2.attr:
                    if key in item.attr: continue
                    item.attr[key] = join_descriptions( item3.attr[key] for item3 in group if key in item3.attr )
            
            print >> out_file, item.as_gff()
            
        out_file.close()
示例#13
0
文件: bowtie.py 项目: promodel/nesoni
 def convert(filename):
     info = io.get_file_info(filename)
     ok = selection.matches(
         'type-fastq:[compression-none/compression-gzip/compression-bzip2]',
         info)
     if ok:
         return filename
     result_name = tempname()
     with open(result_name, 'wb') as f:
         for name, seq, qual in io.read_sequences(
                 filename, qualities='required'):
             io.write_fastq(f, name, seq, qual)
     return result_name
示例#14
0
def classify_files(filenames, selectors):
    """ Put each of a set of files into one or more categories.    
    """
    results = [[] for item in categories]
    for filename in filenames:
        info = get_file_info(filename)
        any = False
        for i, selector in enumerate(selectors):
            if selection.matches(selector, info):
                results[i].append(filename)
                any = True
        if not any:
            raise grace.Error('Don\'t know what to do with ' + filename)
    return results
示例#15
0
文件: io.py 项目: drpowell/nesoni
def classify_files(filenames, selectors):
    """ Put each of a set of files into one or more categories.    
    """
    results = [ [] for item in categories ]
    for filename in filenames:
        info = get_file_info(filename)
        any = False
        for i, selector in enumerate(selectors):
            if selection.matches(selector, info):
                results[i].append(filename)
                any = True
        if not any:
            raise grace.Error('Don\'t know what to do with '+filename)
    return results
示例#16
0
    def run(self):
        assert self.change_strand in STRAND_CHANGE, 'Unknown way to change strand.'
        strand_changer = STRAND_CHANGE[self.change_strand]
        
        shift_start_absolute, shift_start_proportion = decode_shift(self.shift_start)
        shift_end_absolute, shift_end_proportion = decode_shift(self.shift_end)
        
        renames = [ ]
        if self.rename:
            for item in self.rename.split(','):
                new, old = item.split('=')
                if new != old:
                    renames.append((new,old))
    
        out_file = open(self.prefix+'.gff','wb')    
        annotation.write_gff3_header(out_file)
        
        for filename in self.filenames:
            for item in annotation.read_annotations(filename):
                if not selection.matches(self.select, [item.type]): continue
                
                if self.type:
                    item.type = self.type
                
                length = item.end-item.start
                shift_start = int(math.floor(0.5+shift_start_absolute+shift_start_proportion*length))
                shift_end = int(math.floor(0.5+shift_end_absolute+shift_end_proportion*length))
                
                if item.strand == 1:
                    item.start += shift_start
                    item.end += shift_end
                elif item.strand == -1:
                    item.end -= shift_start
                    item.start -= shift_end
                item.start = max(0, item.start) #IGV complains
                
                item.strand = strand_changer[item.strand]
                
                old_attr = item.attr.copy()
                for new,old in renames:
                    if old in item.attr:
                       del item.attr[old]
                for new,old in renames:
                    if old in old_attr:
                       item.attr[new] = old_attr[old]
            
                print >> out_file, item.as_gff()

        out_file.close()
示例#17
0
    def _write_table(self, samples, items):
        names = [
            '%s:%d' % (item.record.CHROM, item.record.POS) for item in items
        ]
        sample_list = io.named_list_type(samples)

        groups = []

        locations_list = io.named_list_type(['CHROM', 'POS'])
        locations = io.named_list_type(names, locations_list)([
            locations_list([item.record.CHROM, item.record.POS])
            for item in items
        ])
        groups.append(('Location', locations))

        genotypes = io.named_list_type(names, sample_list)([
            sample_list([
                describe_genotype(item2, item.variants)
                for item2 in item.genotypes
            ]) for item in items
        ])
        groups.append(('Genotype', genotypes))

        if self.qualities:
            qualities = io.named_list_type(names, sample_list)(
                [sample_list(item.qualities) for item in items])
            groups.append(('Quality', qualities))

        if self.counts:
            counts = io.named_list_type(names, sample_list)([
                sample_list([
                    describe_counts(item2, item.variants)
                    for item2 in item.counts
                ]) for item in items
            ])
            groups.append(('Count', counts))

        annotation_list = io.named_list_type(['snpeff'])
        annotations = io.named_list_type(names, annotation_list)([
            annotation_list([
                ' /// '.join(item2[0] for item2 in item.snpeff
                             if selection.matches(self.snpeff_show, item2[1]))
            ]) for item in items
        ])
        groups.append(('Annotation', annotations))

        io.write_grouped_csv(self.prefix + '.csv', groups)
示例#18
0
    def _write_table(self, samples, items):
        names = [ '%s:%d' % (item.record.CHROM, item.record.POS) for item in items ]
        sample_list = io.named_list_type(samples)
        
        groups = [ ]
        
        locations_list = io.named_list_type(['CHROM','POS'])
        locations = io.named_list_type(names, locations_list)([
            locations_list([ item.record.CHROM, item.record.POS ])
            for item in items
            ])
        groups.append(('Location',locations))
                
        genotypes = io.named_list_type(names,sample_list)([
            sample_list([ describe_genotype(item2,item.variants) for item2 in item.genotypes ])
            for item in items
            ])
        groups.append(('Genotype',genotypes))

        if self.qualities:
            qualities = io.named_list_type(names,sample_list)([
                sample_list(item.qualities)
                for item in items
                ])
            groups.append(('Quality',qualities))

        if self.counts:        
            counts = io.named_list_type(names,sample_list)([
                sample_list([ describe_counts(item2,item.variants) for item2 in item.counts ])
                for item in items
                ])
            groups.append(('Count',counts))
        
        annotation_list = io.named_list_type(['snpeff'])
        annotations = io.named_list_type(names, annotation_list)([
            annotation_list([
                ' /// '.join(item2[0] for item2 in item.snpeff if selection.matches(self.snpeff_show, item2[1]))
                ])
            for item in items
            ])
        groups.append(('Annotation',annotations))
        
        io.write_grouped_csv(self.prefix + '.csv', groups)
def _make_inner(action):
    timestamp = coordinator().time()
    assert timestamp > LOCAL.time, 'Time running in reverse.'
    
    cores = action.cores_required()
    if cores > 1:
        coordinator().trade_cores(1, cores)
    try:        
        config.write_colored_text(sys.stderr, '\n'+action.describe()+'\n')
        
        if LOCAL.abort_make and not selection.matches(LOCAL.do_selection, [action.shell_name()]):
            raise grace.Error('%s would be run. Stopping here.' % action.ident())
        
        old_status = grace.status(action.shell_name())
        try:
            _run_and_save_state(action, timestamp)
        finally:
            grace.status(old_status)
    finally:
        if cores > 1:
            coordinator().trade_cores(cores, 1)
示例#20
0
文件: legion.py 项目: mscook/nesoni
def _get_timestamp(action):
    """ Look for ident() in .state subdirectory of current directory.
        If pickled value matches return the timestamp.
    """
    if selection.matches(LOCAL.do_selection, [action.shell_name()]):
        return None

    try:
        for filename in [
                action.state_filename(),
                os.path.join(
                    '.state', grace.filesystem_friendly_name(
                        action.ident())),  #Old location of state files
        ]:
            if os.path.exists(filename):
                with open(filename, 'rb') as f:
                    old = pickle.load(f)

                if action != old:
                    return None

                if not hasattr(old, 'timestamp'):
                    return None

                if hasattr(old,
                           'timestamp_for') and old.timestamp_for != filename:
                    return None

                return old.timestamp

                #for parameter in self.parameters:
                #    if parameter.get(self) != parameter.get(old):
                #        print >> sys.stderr, parameter.name, parameter.get(old), '->', parameter.get(self)
    except Exception, error:
        import traceback
        traceback.print_exc()
        print >> sys.stderr, 'Error making %s, re-running: %s' % (
            action.ident(), error)
示例#21
0
    def run(self):
        assert self.change_strand in STRAND_CHANGE, 'Unknown way to change strand.'
        strand_changer = STRAND_CHANGE[self.change_strand]
    
        out_file = open(self.prefix+'.gff','wb')    
        annotation.write_gff3_header(out_file)
        
        for filename in self.filenames:
            for item in annotation.read_annotations(filename):
                if not selection.matches(self.select, [item.type]): continue
                
                if item.strand == 1:
                    item.start += self.shift_start
                    item.end += self.shift_end
                elif item.strand == -1:
                    item.end -= self.shift_start
                    item.start -= self.shift_end
                
                item.strand = strand_changer[item.strand]
            
                print >> out_file, item.as_gff()

        out_file.close()
示例#22
0
文件: legion.py 项目: mscook/nesoni
def _make_inner(action):
    timestamp = coordinator().time()
    assert timestamp > LOCAL.time, 'Time running in reverse.'

    cores = action.cores_required()
    if cores > 1:
        coordinator().trade_cores(1, cores)
    try:
        config.write_colored_text(sys.stderr, '\n' + action.describe() + '\n')

        if LOCAL.abort_make and not selection.matches(LOCAL.do_selection,
                                                      [action.shell_name()]):
            raise grace.Error('%s would be run. Stopping here.' %
                              action.ident())

        old_status = grace.status(action.shell_name())
        try:
            _run_and_save_state(action, timestamp)
        finally:
            grace.status(old_status)
    finally:
        if cores > 1:
            coordinator().trade_cores(cores, 1)
 def _create_json(self):                    
     workspace = io.Workspace(self.output_dir, must_exist=False)
     
     samples = [ ]
     groups = [ ]
     for sample in self.samples:
         this_groups = [ ]
         for item in self.groups:
             if selection.matches(
                     selection.term_specification(item),
                     sample.tags + [ sample.output_dir ]
                     ):
                 this_groups.append(selection.term_name(item))
         group = ','.join(this_groups) if this_groups else 'ungrouped'
         if group not in groups: groups.append(group)
         
         item = {
             'name' : sample.output_dir,
             'bam' : os.path.abspath( 
                 workspace/('samples',sample.output_dir,'alignments_filtered_sorted.bam')
                 ),
             'group' : group,
             'tags' : sample.tags,
             }
         samples.append(item)
         
     obj = collections.OrderedDict()
     obj['reference'] = os.path.abspath( self.reference )
     obj['extension'] = self.extension
     obj['genes'] = os.path.abspath( workspace/('peaks','relation-parent.gff') )
     obj['peaks'] = os.path.abspath( workspace/('peaks','relation-child.gff') )
     obj['groups'] = groups
     obj['samples'] = samples
     
     with open(workspace/"plotter-config.json","wb") as f:
         json.dump(obj, f, indent=4)
示例#24
0
 def _create_json(self):                    
     workspace = io.Workspace(self.output_dir, must_exist=False)
     
     samples = [ ]
     groups = [ ]
     for sample in self.samples:
         this_groups = [ ]
         for item in self.groups:
             if selection.matches(
                     selection.term_specification(item),
                     sample.tags + [ sample.output_dir ]
                     ):
                 this_groups.append(selection.term_name(item))
         group = ','.join(this_groups) if this_groups else 'ungrouped'
         if group not in groups: groups.append(group)
         
         item = {
             'name' : sample.output_dir,
             'bam' : os.path.abspath( 
                 workspace/('samples',sample.output_dir,'alignments_filtered_sorted.bam')
                 ),
             'group' : group,
             'tags' : sample.tags,
             }
         samples.append(item)
         
     obj = collections.OrderedDict()
     obj['reference'] = os.path.abspath( self.reference )
     obj['extension'] = self.extension
     obj['genes'] = os.path.abspath( workspace/('peaks','relation-parent.gff') )
     obj['peaks'] = os.path.abspath( workspace/('peaks','relation-child.gff') )
     obj['groups'] = groups
     obj['samples'] = samples
     
     with open(workspace/"plotter-config.json","wb") as f:
         json.dump(obj, f, indent=4)
示例#25
0
 def matches(self, expression):
     return selection.matches(expression, self.get_tags())
 def run(self):
     data = io.read_grouped_table(
         self.counts,
         [('Count',str), ('Annotation',str), ('Tail_count',str), ('Tail',str), ('Proportion',str)],
         'Count',
         )
     
     features = data['Count'].keys()
     samples = data['Count'].value_type().keys()
     
     tags = { }
     for sample in samples:
         tags[sample] = [sample]        
     for line in data.comments:
         if line.startswith('#sampleTags='):
             parts = line[len('#sampleTags='):].split(',')
             tags[parts[0]] = parts
     
     group_names = [ ]
     groups = [ ]
     group_tags = [ ]
     
     for item in self.groups:
         select = selection.term_specification(item)
         name = selection.term_name(item)
         group = [ item for item in samples if selection.matches(select, tags[item]) ]
         assert group, 'Empty group: '+name
         
         this_group_tags = [ name ]
         for tag in tags[group[0]]:
             if tag == name: continue
             for item in group[1:]:
                 for item2 in tags[item]:
                     if tag not in item2: break
                 else:
                     this_group_tags.append(tag)
         
         group_names.append(name)
         groups.append(group)
         group_tags.append(this_group_tags)
     
     result = io.Grouped_table()
     result.comments = [ '#Counts' ]
     for item in group_tags:
         result.comments.append('#sampleTags='+','.join(item))
     
     
     count = [ ]
     tail_count = [ ]
     tail = [ ]
     proportion = [ ]
     for feature in features:
         this_count = [ ]
         this_tail_count = [ ]
         this_tail = [ ]
         this_proportion = [ ]
         for group in groups:
             this_this_count = [ ]
             this_this_tail_count = [ ]
             this_this_tail = [ ]
             this_this_proportion = [ ]
             for sample in group:
                 this_this_count.append(int(data['Count'][feature][sample]))
                 this_this_tail_count.append(int(data['Tail_count'][feature][sample]))
                 item = data['Tail'][feature][sample]
                 if item != 'NA': this_this_tail.append(float(item))
                 item = data['Proportion'][feature][sample]
                 if item != 'NA': this_this_proportion.append(float(item))
             
             this_count.append(str(sum(this_this_count)))
             this_tail_count.append(str(sum(this_this_tail_count)))
             this_tail.append(str(sum(this_this_tail)/len(this_this_tail)) if this_this_tail else 'NA')
             this_proportion.append(str(sum(this_this_proportion)/len(this_this_proportion)) if this_this_proportion else 'NA')
                 
         count.append(this_count)
         tail_count.append(this_tail_count)
         tail.append(this_tail)
         proportion.append(this_proportion)
     
     matrix = io.named_matrix_type(features,group_names)
     result['Count'] = matrix(count)
     result['Annotation'] = data['Annotation']
     result['Tail_count'] = matrix(tail_count)
     result['Tail'] = matrix(tail)
     result['Proportion'] = matrix(proportion)
     result.write_csv(self.prefix + '.csv')
    def run(self):
        data = io.read_grouped_table(
            self.counts,
            [("Count", str), ("Annotation", str), ("Tail_count", str), ("Tail", str), ("Proportion", str)],
            "Count",
        )

        features = data["Count"].keys()
        samples = data["Count"].value_type().keys()

        tags = {}
        for sample in samples:
            tags[sample] = [sample]
        for line in data.comments:
            if line.startswith("#sampleTags="):
                parts = line[len("#sampleTags=") :].split(",")
                tags[parts[0]] = parts

        group_names = []
        groups = []
        group_tags = []

        for item in self.groups:
            select = selection.term_specification(item)
            name = selection.term_name(item)
            group = [item for item in samples if selection.matches(select, tags[item])]
            assert group, "Empty group: " + name

            this_group_tags = [name]
            for tag in tags[group[0]]:
                if tag == name:
                    continue
                for item in group[1:]:
                    for item2 in tags[item]:
                        if tag not in item2:
                            break
                    else:
                        this_group_tags.append(tag)

            group_names.append(name)
            groups.append(group)
            group_tags.append(this_group_tags)

        result = io.Grouped_table()
        result.comments = ["#Counts"]
        for item in group_tags:
            result.comments.append("#sampleTags=" + ",".join(item))

        count = []
        tail_count = []
        tail = []
        proportion = []
        for feature in features:
            this_count = []
            this_tail_count = []
            this_tail = []
            this_proportion = []
            for group in groups:
                this_this_count = []
                this_this_tail_count = []
                this_this_tail = []
                this_this_proportion = []
                for sample in group:
                    this_this_count.append(int(data["Count"][feature][sample]))
                    this_this_tail_count.append(int(data["Tail_count"][feature][sample]))
                    item = data["Tail"][feature][sample]
                    if item != "NA":
                        this_this_tail.append(float(item))
                    item = data["Proportion"][feature][sample]
                    if item != "NA":
                        this_this_proportion.append(float(item))

                this_count.append(str(sum(this_this_count)))
                this_tail_count.append(str(sum(this_this_tail_count)))
                this_tail.append(str(sum(this_this_tail) / len(this_this_tail)) if this_this_tail else "NA")
                this_proportion.append(
                    str(sum(this_this_proportion) / len(this_this_proportion)) if this_this_proportion else "NA"
                )

            count.append(this_count)
            tail_count.append(this_tail_count)
            tail.append(this_tail)
            proportion.append(this_proportion)

        matrix = io.named_matrix_type(features, group_names)
        result["Count"] = matrix(count)
        result["Annotation"] = data["Annotation"]
        result["Tail_count"] = matrix(tail_count)
        result["Tail"] = matrix(tail)
        result["Proportion"] = matrix(proportion)
        result.write_csv(self.prefix + ".csv")
示例#28
0
    def run(self):
        reader_f = io.open_possibly_compressed_file(self.vcf)
        reader = vcf.Reader(reader_f)

        tags = {}
        for item in reader.metadata.get('sampleTags', []):
            parts = item.split(',')
            tags[parts[0]] = parts

        assert 'reference' not in reader.samples, 'Can\'t have a sample called reference, sorry.'

        samples = ['reference'] + reader.samples

        for sample in samples:
            if sample not in tags:
                tags[sample] = [sample, 'all']

        samples = selection.select_and_sort(self.select, self.sort, samples,
                                            lambda sample: tags[sample])

        required = [
            i for i, sample in enumerate(samples)
            if selection.matches(self.require, tags[sample])
        ]

        sample_number = dict((b, a) for a, b in enumerate(reader.samples))

        items = []
        for record in reader:
            variants = get_variants(record)
            genotypes = []
            counts = []
            qualities = []
            for sample in samples:
                if sample == 'reference':
                    genotypes.append([0])
                    counts.append([1])
                    qualities.append(float('inf'))
                else:
                    genotypes.append(
                        get_genotype(record.samples[sample_number[sample]]))
                    counts.append(
                        get_variant_counts(
                            record.samples[sample_number[sample]]))
                    qualities.append(
                        record.samples[sample_number[sample]].data.GQ)

            # Only output when there are at least two genotypes
            any_interesting = False
            for i in xrange(len(genotypes)):
                for j in xrange(i):
                    if (genotypes[i] is not None and genotypes[j] is not None
                            and
                            not genotypes_equal(genotypes[i], genotypes[j])):
                        any_interesting = True
                        break
                if any_interesting: break
            if not any_interesting:
                continue

            if any(genotypes[i] is None for i in required):
                continue

            if self.only_snps and any(genotype is not None and any(
                    len(variants[i]) != 1 for i in genotype)
                                      for genotype in genotypes):
                continue

            snpeff = snpeff_describe(record.INFO.get('EFF', ''))
            if not any(
                    selection.matches(self.snpeff_filter, item[1])
                    for item in (snpeff or [('', [])])):
                continue

            items.append(
                _Nway_record(variants=variants,
                             genotypes=genotypes,
                             counts=counts,
                             qualities=qualities,
                             snpeff=snpeff,
                             record=record))

        self.log.log('%d variants\n\n' % len(items))

        if self.as_ == 'table':
            self._write_table(samples, items)
        elif self.as_ == 'nexus':
            self._write_nexus(samples, items)
        elif self.as_ == 'splitstree':
            self._write_nexus(samples, items)

            io.execute(
                'SplitsTree +g -i INPUT -x COMMAND',
                no_display=True,
                INPUT=self.prefix + '.nex',
                COMMAND='UPDATE; '
                'SAVE FILE=\'%s.nex\' REPLACE=yes; '
                'EXPORTGRAPHICS format=svg file=\'%s.svg\' REPLACE=yes TITLE=\'NeighborNet from %d variants\'; '
                'QUIT' % (self.prefix, self.prefix, len(items)),
            )
        elif self.as_ == 'vcf':
            self._write_vcf(samples, items, reader)

        else:
            raise grace.Error('Unknown output format: ' + self.as_)
示例#29
0
    def run(self):
        annotations = []
        for filename in self.filenames:
            for item in annotation.read_annotations(filename):
                if not selection.matches(self.select, [item.type]): continue
                if self.type:
                    item.type = self.type
                annotations.append(item)

        annotations.sort(
            key=lambda item: (item.type, item.seqid, item.strand, item.start))

        group = []
        groups = []

        def emit():
            if not group: return
            groups.append(group[:])
            del group[:]

        type = None
        seqid = None
        strand = None
        end = 0
        for item in annotations:
            if item.type != type or item.seqid != seqid or item.strand != strand or item.start >= end:
                emit()
                type = item.type
                seqid = item.seqid
                strand = item.strand
                end = item.end - self.overlap
            group.append(item)
            end = max(item.end - self.overlap, end)
        emit()

        items = []

        id_map = {}

        for group in groups:
            item = annotation.Annotation()
            item.source = group[0].source
            item.type = group[0].type
            item.seqid = group[0].seqid
            item.strand = group[0].strand
            item.start = min(item2.start for item2 in group)
            item.end = max(item2.end for item2 in group)
            item.score = None
            item.phase = None
            item.attr = {}

            for item2 in group:
                for key in item2.attr:
                    if key in item.attr: continue
                    item.attr[key] = join_descriptions([
                        item3.attr[key] for item3 in group if key in item3.attr
                    ], self.joiner)

            item.parents = []
            for item2 in group:
                if 'ID' in item2.attr:
                    assert item2.attr[
                        'ID'] not in id_map, 'Duplicate ID: ' + item2.attr['ID']
                    id_map[item2.attr['ID']] = item.attr['ID']
                if 'Parent' in item2.attr:
                    item.parents.append(item2.attr['Parent'])

            items.append(item)

        for item in items:
            if item.parents:
                item.attr['Parent'] = join_descriptions(
                    [id_map.get(parent, parent) for parent in item.parents],
                    ',')

        with open(self.prefix + '.gff', 'wb') as out_file:
            annotation.write_gff3_header(out_file)
            for item in items:
                print >> out_file, item.as_gff()
示例#30
0
 def matches(self, expression):
     return selection.matches(expression, self.get_tags())
示例#31
0
    def run(self):
        annotations = [ ]
        for filename in self.filenames:
            for item in annotation.read_annotations(filename):
                if not selection.matches(self.select, [item.type]): continue
                if self.type:
                    item.type = self.type
                annotations.append(item)
        
        annotations.sort(key=lambda item: (item.type, item.seqid, item.strand, item.start))
        
        group = [ ]
        groups = [ ]
        def emit():
            if not group: return
            groups.append(group[:])
            del group[:]        
        type = None
        seqid = None
        strand = None
        end = 0
        for item in annotations:
            if item.type != type or item.seqid != seqid or item.strand != strand or item.start >= end:
                emit()
                type = item.type
                seqid = item.seqid
                strand = item.strand
                end = item.end-self.overlap
            group.append(item)
            end = max(item.end-self.overlap, end)
        emit()


        items = [ ]
        
        id_map = { }

        for group in groups:
            item = annotation.Annotation()
            item.source = group[0].source
            item.type = group[0].type
            item.seqid = group[0].seqid
            item.strand = group[0].strand
            item.start = min( item2.start for item2 in group )
            item.end = max( item2.end for item2 in group )
            item.score = None
            item.phase = None
            item.attr = { }
            
            for item2 in group:
                for key in item2.attr:
                    if key in item.attr: continue
                    item.attr[key] = join_descriptions([ item3.attr[key] for item3 in group if key in item3.attr ], self.joiner )

            item.parents = [ ]
            for item2 in group:
                if 'ID' in item2.attr:
                    assert item2.attr['ID'] not in id_map, 'Duplicate ID: '+item2.attr['ID']
                    id_map[item2.attr['ID']] = item.attr['ID']
                if 'Parent' in item2.attr:
                    item.parents.append(item2.attr['Parent'])
            
            items.append(item)
        
        for item in items:
            if item.parents:
                item.attr['Parent'] = join_descriptions([ id_map.get(parent,parent) for parent in item.parents ], ',')
        
        with open(self.prefix+'.gff','wb') as out_file:
            annotation.write_gff3_header(out_file)
            for item in items:
                print >> out_file, item.as_gff()
示例#32
0
    def run(self):
        reader_f = io.open_possibly_compressed_file(self.vcf)
        reader = vcf.Reader(reader_f)

        tags = { }
        for item in reader.metadata.get('sampleTags',[]):
            parts = item.split(',')
            tags[parts[0]] = parts
        
        assert 'reference' not in reader.samples, 'Can\'t have a sample called reference, sorry.'

        samples = [ 'reference'] + reader.samples
        
        for sample in samples:
            if sample not in tags:
                tags[sample] = [ sample, 'all' ]

        samples = selection.select_and_sort(
            self.select, self.sort, samples, lambda sample: tags[sample])
        
        required = [ i for i, sample in enumerate(samples)
                     if selection.matches(self.require, tags[sample]) ]
        
        sample_number = dict((b,a) for a,b in enumerate(reader.samples))
        
        items = [ ]
        for record in reader:
            variants = get_variants(record)
            genotypes = [ ]
            counts = [ ]
            qualities = [ ]
            for sample in samples:
                if sample == 'reference':
                    genotypes.append([0])
                    counts.append([1])
                    qualities.append(float('inf'))
                else:
                    genotypes.append(get_genotype(record.samples[sample_number[sample]]))
                    counts.append(get_variant_counts(record.samples[sample_number[sample]]))
                    qualities.append(record.samples[sample_number[sample]].data.GQ)

            # Only output when there are at least two genotypes            
            any_interesting = False
            for i in xrange(len(genotypes)):
                for j in xrange(i):
                    if (genotypes[i] is not None and genotypes[j] is not None and
                        not genotypes_equal(genotypes[i], genotypes[j])):
                        any_interesting = True
                        break
                if any_interesting: break
            if not any_interesting:
                continue

            if any(genotypes[i] is None for i in required):
                continue
                
            if self.only_snps and any(
                genotype is not None and any(len(variants[i]) != 1 for i in genotype)
                for genotype in genotypes):
                continue
                
            snpeff = snpeff_describe(record.INFO.get('EFF',''))
            if not any( selection.matches(self.snpeff_filter, item[1]) for item in (snpeff or [('',[])]) ):
                continue

            items.append(_Nway_record(variants=variants, genotypes=genotypes, counts=counts, qualities=qualities, snpeff=snpeff, record=record))
        
        self.log.log('%d variants\n\n' % len(items))
        
        if self.as_ == 'table':
            self._write_table(samples, items)
        elif self.as_ == 'nexus':
            self._write_nexus(samples, items)
        elif self.as_ == 'splitstree':
            self._write_nexus(samples, items)
            
            io.execute(
                'SplitsTree +g -i INPUT -x COMMAND',
                no_display=True,
                INPUT=self.prefix + '.nex',
                COMMAND='UPDATE; '
                        'SAVE FILE=\'%s.nex\' REPLACE=yes; '
                        'EXPORTGRAPHICS format=svg file=\'%s.svg\' REPLACE=yes TITLE=\'NeighborNet from %d variants\'; ' 
                        'QUIT' 
                        % (self.prefix, self.prefix, len(items)),
                )
        elif self.as_ == 'vcf':
            self._write_vcf(samples, items, reader)
        
        else:
            raise grace.Error('Unknown output format: '+self.as_)
示例#33
0
    def run(self):
        data = io.read_grouped_table(
            self.counts,
            [('Count', str), ('Annotation', str), ('Tail_count', str),
             ('Tail', str), ('Proportion', str)],
            'Count',
        )

        features = data['Count'].keys()
        samples = data['Count'].value_type().keys()

        tags = {}
        for sample in samples:
            tags[sample] = [sample]
        for line in data.comments:
            if line.startswith('#sampleTags='):
                parts = line[len('#sampleTags='):].split(',')
                tags[parts[0]] = parts

        group_names = []
        groups = []
        group_tags = []

        for item in self.groups:
            select = selection.term_specification(item)
            name = selection.term_name(item)
            group = [
                item for item in samples
                if selection.matches(select, tags[item])
            ]
            assert group, 'Empty group: ' + name

            this_group_tags = [name]
            for tag in tags[group[0]]:
                if tag == name: continue
                for item in group[1:]:
                    for item2 in tags[item]:
                        if tag not in item2: break
                    else:
                        this_group_tags.append(tag)

            group_names.append(name)
            groups.append(group)
            group_tags.append(this_group_tags)

        result = io.Grouped_table()
        result.comments = ['#Counts']
        for item in group_tags:
            result.comments.append('#sampleTags=' + ','.join(item))

        count = []
        tail_count = []
        tail = []
        proportion = []
        for feature in features:
            this_count = []
            this_tail_count = []
            this_tail = []
            this_proportion = []
            for group in groups:
                this_this_count = []
                this_this_tail_count = []
                this_this_tail = []
                this_this_proportion = []
                for sample in group:
                    this_this_count.append(int(data['Count'][feature][sample]))
                    this_this_tail_count.append(
                        int(data['Tail_count'][feature][sample]))
                    item = data['Tail'][feature][sample]
                    if item != 'NA': this_this_tail.append(float(item))
                    item = data['Proportion'][feature][sample]
                    if item != 'NA': this_this_proportion.append(float(item))

                this_count.append(str(sum(this_this_count)))
                this_tail_count.append(str(sum(this_this_tail_count)))
                this_tail.append(
                    str(sum(this_this_tail) /
                        len(this_this_tail)) if this_this_tail else 'NA')
                this_proportion.append(
                    str(sum(this_this_proportion) / len(this_this_proportion)
                        ) if this_this_proportion else 'NA')

            count.append(this_count)
            tail_count.append(this_tail_count)
            tail.append(this_tail)
            proportion.append(this_proportion)

        matrix = io.named_matrix_type(features, group_names)
        result['Count'] = matrix(count)
        result['Annotation'] = data['Annotation']
        result['Tail_count'] = matrix(tail_count)
        result['Tail'] = matrix(tail)
        result['Proportion'] = matrix(proportion)
        result.write_csv(self.prefix + '.csv')
示例#34
0
文件: test.py 项目: stu2/tail-tools
   def run(self):
       title = self.get_title()
   
       n_alt = len(self.alt)
       n_null = len(self.null)
       
       suffix = '-dedup' if self.dedup else ''
   
       genewise_filename = join(self.analysis,'expression','genewise'+suffix,'counts.csv')
       genewise_norm_filename = join(self.analysis,'expression','genewise'+suffix,'norm.csv')

       peakwise_filename = join(self.analysis,'expression','peakwise'+suffix,'counts.csv')
       peakwise_norm_filename = join(self.analysis,'expression','peakwise'+suffix,'norm.csv')

       pairwise_filename = join(self.analysis,'peak-shift'+suffix,'individual-pairs.csv')
       pairwise_norm_filename = join(self.analysis,'peak-shift'+suffix,'individual-pairs-norm.csv')

   
       reader = io.Table_reader(genewise_filename, 'Count')
       reader.close()
       samples = [ item for i, item in enumerate(reader.headings) if reader.groups[i] == 'Count' ]
       tags = { }
       for item in samples:
           tags[item] = [ item ]
       for line in reader.comments:
           if line.startswith('#sampleTags='):
               parts = line[len('#sampleTags='):].split(',')
               tags[parts[0]] = parts
              
       model = [ ]
       for term in self.alt + self.null:        
           spec = term_specification(term)
           model.append([ 1 if selection.matches(spec, tags[item]) else 0 for item in samples ])
       model = zip(*model) #Transpose
       
       select = [ any(row) for row in model ]
       model = [ row for row,selected in zip(model,select) if selected ]
       model_columns = [ term_name(item) for item in self.alt + self.null ]
       
       pairs_n_alt = n_alt       
       pairs_select = select + select
       pairs_model = (
           [ (0,) * n_alt + row + (0,) for row in model ] +
           [ row[:n_alt]  + row + (1,) for row in model ] 
           )
       pairs_model_columns = (
           [ item+'-interaction' for item in model_columns[:n_alt] ] +
           model_columns +
           [ 'pair2' ]
           )
       
       workspace = self.get_workspace()
       
       runr.run_script(TEST_R, self.tell,
           SOURCE = os.path.join(os.path.dirname(__file__),'tail_tools.R'),
           DIR = workspace.working_dir,
           MIN_READS = self.min_reads,
           GENEWISE_FILENAME = genewise_filename,
           GENEWISE_NORM_FILENAME = genewise_norm_filename,
           PEAKWISE_FILENAME = peakwise_filename,
           PEAKWISE_NORM_FILENAME = peakwise_norm_filename,
           PAIRWISE_FILENAME = pairwise_filename,
           PAIRWISE_NORM_FILENAME = pairwise_norm_filename,
           
           N_ALT = n_alt,
           SELECT = select,
           MODEL = model,
           MODEL_COLUMNS = model_columns,
           PAIRS_N_ALT = pairs_n_alt,
           PAIRS_SELECT = pairs_select,
           PAIRS_MODEL = pairs_model,
           PAIRS_MODEL_COLUMNS = pairs_model_columns,
           )
       if self.tell: return
       
       reporter = reporting.Reporter(workspace.working_dir, title)
       
       if self.dedup:
           reporter.p('Read deduplication was used.')
       
       for entities, result, aveexpr, subtitle, terms in [
           ('genes', 'genewise-voom', 'avg.expression', 'Genewise expression level', model_columns[:n_alt]),
           ('genes', 'genewise-tail', 'avg.tail', 'Genewise tail length', model_columns[:n_alt]),
           ('peaks', 'peakwise-voom', 'avg.expression', 'Peakwise expression level', model_columns[:n_alt]),
           ('peaks', 'peakwise-tail', 'avg.tail', 'Peakwise tail length', model_columns[:n_alt]),
           ('peak pairs', 'pairwise-voom', 'avg.expression', 'Peak-pair expression shift', pairs_model_columns[:n_alt]),
           ('peak pairs', 'pairwise-tail', 'avg.tail', 'Peak-pair tail length shift', pairs_model_columns[:n_alt]),
           ]:
           #data = io.read_grouped_table(workspace/(result+'-toptable.csv'))['All']
           #n = 0
           #n_01 = 0
           #n_05 = 0
           #for row in data.values():
           #    fdr = float(row['adj.P.Val'])
           #    if fdr <= 0.01: n_01 += 1
           #    if fdr <= 0.05: n_05 += 1
           #    n += 1
           
           io.execute([
               'degust.py',
               '--name', title + ' : ' + subtitle,
               '--avg', aveexpr,
               '--primary', 'baseline',
               '--logFC', ','.join(terms),
               '--fdr', 'adj.P.Val',
               '--info', 'gene,locus_tag,product,reads,polya.reads,tail.lengths,'+aveexpr,
               '--notour', '1',
               '--out', workspace/(result+'.html'),
               workspace/(result+'-toptable.csv'),
               ])
            
           reporter.subheading( reporter.href(workspace/(result+'.html'), subtitle) )
           #reporter.p( '%d %s, %d with fdr&lt;=0.01, %d with fdr&lt;=0.05' % (n,entities,n_01,n_05) )
           with open(workspace/(result+'.txt'),'rU') as f:
               for line in f:
                   reporter.write(line.strip() + '<br/>\n')
        
       reporter.close()