Exemplo n.º 1
0
    def __init__(self, nib_fns=[], nib_dirs=[]):
        '''*nib_fns* is a list of paths to specific .nib files desired for the
        NibDB.  *nib_dirs* is a list of paths to directories containing .nib
        files such that every .nib file in the directories is added to the NibDB.
        Explicitly passed files take precedence over those found in directories
        when sequence names collide.
        '''
        SeqDB.__init__(self)

        # find all *.nib files in the directories passed
        if isinstance(nib_dirs, str):  # user just provided single directory
            nib_dirs = [nib_dirs]

        dir_nibs = []
        for d in nib_dirs:
            dir_nibs.extend(glob.glob(os.path.join(d, '*.nib')))

        if isinstance(nib_fns, str):
            nib_fns = [nib_fns]
        # for each .nib found, add to db
        # if there is a collision of names, those specified in files (not dirs)
        # takes precedence without warning
        for fn in dir_nibs + nib_fns:

            # open the nib file
            nib_path, nib_fn, nib_base, nib_ext = get_file_parts(fn)
            fn, nib_f = _nib_fd(fn)
            self._db_map[nib_base] = nib_f

            # store some info
            self.db_info[nib_base]['path'] = fn
            nbases = validate_nib_file(self._db_map[nib_base])
            self.db_info[nib_base]['nbases'] = nbases
Exemplo n.º 2
0
    def __init__(self,nib_fns=[],nib_dirs=[]) :
        '''*nib_fns* is a list of paths to specific .nib files desired for the
        NibDB.  *nib_dirs* is a list of paths to directories containing .nib
        files such that every .nib file in the directories is added to the NibDB.
        Explicitly passed files take precedence over those found in directories
        when sequence names collide.
        '''
        SeqDB.__init__(self)

        # find all *.nib files in the directories passed
        if isinstance(nib_dirs,str) : # user just provided single directory
            nib_dirs = [nib_dirs]

        dir_nibs = []
        for d in nib_dirs :
            dir_nibs.extend(glob.glob(os.path.join(d,'*.nib')))

        if isinstance(nib_fns,str) :
            nib_fns = [nib_fns]
        # for each .nib found, add to db
        # if there is a collision of names, those specified in files (not dirs)
        # takes precedence without warning
        for fn in dir_nibs+nib_fns :

            # open the nib file
            nib_path,nib_fn,nib_base,nib_ext = get_file_parts(fn)
            fn, nib_f = _nib_fd(fn)
            self._db_map[nib_base] = nib_f

            # store some info
            self.db_info[nib_base]['path'] = fn
            nbases = validate_nib_file(self._db_map[nib_base])
            self.db_info[nib_base]['nbases'] = nbases
Exemplo n.º 3
0
def get_nib_header_batch(nib, queries):
    '''Batch method for creating nibFrag headers.  *queries* is a list of at most
    6-tuples (start,end,strand,name,dbHeader,tbaHeader) representing queries as
    specified by the original nibFrag utility.  Only start, end, and strand
    fields are required.'''

    nib_path, nib_f = _nib_fd(nib)

    nib_dir, nib_fn, nib_base, nib_ext = get_file_parts(nib_path)
    nbases = validate_nib_file(nib)
    headers = []
    header_tmpl = '>%(name)s%(db)s\n'

    for rec in queries:

        # set some defaults if they are not supplied
        rec = list(rec)
        rec.extend([None] * (6 - len(rec)))
        start, end, strand, name, dbHeader, tbaHeader = rec

        if end == -1:
            end = nbases
        fields = {}
        fields['name'] = nib_path + ':%d-%d' % (start,
                                                end) if not name else name
        fields['db'] = ''

        if tbaHeader:
            # ignored for some reason in nibFrag when tbaHeader supplied and dbHeader is not
            fields['name'] = '' if not dbHeader else fields['name']
            fields['db'] = '%s.%s:%d-%d of %d' % (tbaHeader, nib_base, start,
                                                  end, nbases)
        if dbHeader:
            fields['db'] = ':%s.%s:%d-%d:%s:%d' % (dbHeader, nib_base, start,
                                                   end, strand, nbases)

        headers.append(header_tmpl % fields)

    return headers
Exemplo n.º 4
0
def get_nib_header_batch(nib,queries) :
    '''Batch method for creating nibFrag headers.  *queries* is a list of at most
    6-tuples (start,end,strand,name,dbHeader,tbaHeader) representing queries as
    specified by the original nibFrag utility.  Only start, end, and strand
    fields are required.'''

    nib_path, nib_f = _nib_fd(nib)

    nib_dir,nib_fn,nib_base,nib_ext = get_file_parts(nib_path)
    nbases = validate_nib_file(nib)
    headers = []
    header_tmpl = '>%(name)s%(db)s\n'

    for rec in queries :

        # set some defaults if they are not supplied
        rec = list(rec)
        rec.extend([None]*(6-len(rec)))
        start, end, strand, name, dbHeader, tbaHeader  = rec

        if end == -1 :
            end = nbases
        fields = {}
        fields['name'] = nib_path+':%d-%d'%(start,end) if not name else name
        fields['db'] = ''

        if tbaHeader :
            # ignored for some reason in nibFrag when tbaHeader supplied and dbHeader is not
            fields['name'] = '' if not dbHeader else fields['name']
            fields['db'] = '%s.%s:%d-%d of %d'%(tbaHeader,nib_base,start,end,nbases)
        if dbHeader :
            fields['db'] = ':%s.%s:%d-%d:%s:%d'%(dbHeader,nib_base,start,end,strand,nbases)

        headers.append(header_tmpl%fields)

    return headers
    # parse command line arguments
    opts, args = parser.parse_args(sys.argv[1:])

    if len(args) < 3 :
        parser.error('Must provide two non-option arguments')

    # filenames and paths
    organism, experiment_fn, control_fn = args[0:3]
    control_fn = None
    if len(args) > 3 :
        control_fn = args[2]

    org_settings = get_org_settings(organism)
    refseq_fn = org_settings['annotation_path']

    exp_fpath,exp_fname,exp_fbase,exp_fext = get_file_parts(experiment_fn)
    exp_wrk_dir = os.path.abspath('.exp_%s_%s'%(exp_fbase,opts.exp_name))

    if control_fn :
        cnt_fpath,cnt_fname,cnt_fbase,cnt_fext = get_file_parts(control_fn)
        cnt_wrk_dir = os.path.abspath('.cnt_%s_%s'%(cnt_fbase,opts.exp_name))

    # the pipeline
    pipeline = Pypeline()

    steps = []

    # split up files
    calls = ["mkdir %s"%exp_wrk_dir,
             "split_file.py %s --outdir=%s %s"%(opts.split_args,exp_wrk_dir,experiment_fn),]
    if control_fn :
Exemplo n.º 6
0


if __name__ == '__main__' :

    opts,args = parser.parse_args(sys.argv[1:])

    if len(args) == 0 :
        parser.print_usage()
        sys.exit(1)

    gerald_fns = args

    # step through the files
    for gerald_fn in gerald_fns :
        path,fn,fnbase,fnext = get_file_parts(gerald_fn)
        bed_lines = []


        # where to write output to
        if opts.stdout :
            f_out = sys.stdout
        else :
            f_out = open(os.path.join(path,fnbase+'.bed'),'w')

        # process input
        gerald_d = DictReader(open(gerald_fn),fieldnames=GERALDOutput.FIELD_NAMES,delimiter='\t')
        for line_d in gerald_d :
            if (opts.pass_only and line_d['filtering'] == 'Y' and line_d['match_pos'] != '') or (not opts.pass_only and line_d['match_pos'] != '') :

                if opts.chromo_strip is not None :
Exemplo n.º 7
0
if __name__ == '__main__' :

    opts, args = parser.parse_args(sys.argv[1:])

    utility, filenames = args[0], args[1:]

    # try to find the utility
    abs_utility = os.path.abspath(utility)
    if not os.path.exists(abs_utility) :
        # look on the path
        abs_utility = Popen('which %s'%utility,shell=True,stdout=PIPE,stderr=PIPE).communicate()[0].strip()
        if not os.path.exists(abs_utility) :
            raise Exception("Utility %s could not be found in the local directory or on the user's path, exiting"%utility)
            sys.exit(1)

    upath,uname,ubase,uext = get_file_parts(abs_utility)

    runscript_tmpl = """
#!/bin/bash

#$ -N %(jobname)s
#$ -S /bin/sh
#$ -o %(stdout)s
#$ -e %(stderr)s
#$ -cwd
export PYTHONPATH=%(pythonpath)s:${PYTHONPATH}

%(utility)s %(utilargs)s %(filename)s"""

    suffix = ubase if opts.suffix is None else opts.suffix
    for fn in filenames :
Exemplo n.º 8
0
nibFrag_grp.add_option('--dbHeader',dest='dbHeader',default=None,help='Add full database info to the header, with or without -name option')
nibFrag_grp.add_option('--tbaHeader',dest='tbaHeader',default=None,help='Format header for compatibility with tba, takes database name as argument')
parser.add_option_group(nibFrag_grp)


if __name__ == '__main__' :

    opts, args = parser.parse_args(sys.argv[1:])

    if len(args) < 1 :
        parser.print_usage()
        parser.exit(1)

    # setup
    nib_path = args[0]
    nib_dir,nib_fn,nib_base,nib_ext = get_file_parts(nib_path)

    queries = []
    if opts.batch :

        if len(args) < 2 :
            parser.error('Two arguments must be supplied in batch mode')

        batch_fns = args[1:]

        for fn in batch_fns :
            if opts.batch_format == 'BED' :
                for bed in BEDFile(fn) :
                    if bed['chrom'] != nib_base :
                        warnings.warn('Chromosome in BED line %s does not match file %s, skipping'%(bed['chrom'],nib_base))
                    else :
Exemplo n.º 9
0
            filter_str = filter_str.replace('>=','_GTE_')
            filter_str = filter_str.replace('<=','_LTE_')
            filter_str = filter_str.replace('>','_GT_')
            filter_str = filter_str.replace('<','_LT_')
            fn_str += '_%s'%filter_str

        if opts.top is not None :
            fn_str += '_top%d'%opts.top

        if len(opts.sort_by) != 0 :
            fn_str += '_sortby_%s'%opts.sort_by

        if opts.shuffle :
            fn_str += '_shuffled'

        macs_path,macs_fn,macs_basefn,macs_ext = get_file_parts(args[0])
        encoded_fn = os.path.join(macs_path,macs_basefn+fn_str+macs_ext)
        if opts.print_encoded_fn :
            sys.stdout.write(encoded_fn)
            sys.exit(0)
        else :
            out_f = open(encoded_fn,'w')
    elif opts.output :
        out_f = open(opts.output,'w')
    else :
        out_f = sys.stdout

    # parse the filters
    field_filters = defaultdict(list)
    for filter in opts.filters :
        field, filter_cond = parse_filter(filter)
Exemplo n.º 10
0
from optparse import OptionParser

from chipsequtil import KnownGeneFile, get_file_parts

#args = ['/nfs/genomes/mouse_gp_jul_07/anno/knownGene-2010-07-08.txt','/nfs/genomes/mouse_gp_jul_07/anno/kgXref-2010-07-08.txt']
args = ['/nfs/genomes/mouse_gp_jul_07/anno/knownGene-2010-08-03.gtf','/nfs/genomes/mouse_gp_jul_07/anno/kgXref-2010-07-08.txt']
usage = '%prog <knownGene annotation>'
description = 'convert a UCSC knownGene annotation to GFF'
parser = OptionParser(usage=usage,description=description)


if __name__ == '__main__' :

    opts, args = parser.parse_args(args)

    kg_path,kg_fn,kg_base,kg_ext = get_file_parts(args[0])
    #kg_f = KnownGeneFile(args[0])

    # xref for finding gene symbols
    kgXref_fn = args[1]
    kgXref_fieldnames = ['kgID','mRNA','spID','spDisplayID','geneSymbol','refseq','proAcc','description']
    xref_map = dict([(x['kgID'],x) for x in DictReader(open(kgXref_fn),delimiter='\t',fieldnames=kgXref_fieldnames)])

    gff_headers = ['seqname','source','feature','start','end','score','strand','frame','attributes']
    gff_reader = DictReader(open(args[0]),delimiter='\t',fieldnames=gff_headers)
    gff_writer = DictWriter(sys.stdout,delimiter='\t',fieldnames=gff_headers,quotechar='',quoting=QUOTE_NONE,lineterminator='\n')
    #gff_writer.writerow(dict([(x,x) for x in gff_headers]))

    for i,rec in enumerate(gff_reader) :
        #d = {}
        #d['seqname'] = rec['chrom']
Exemplo n.º 11
0
        else :
            other_args.append(arg)

    opts, args = parser.parse_args(wqsub_args)

    if len(other_args) == 0 :
        parser.error('Must provide a command')

    command = ' '.join(other_args)
    runscript_tmpl = templates[opts.drm]
    # set up job parameters
    cmd_exe = os.path.basename(other_args[0])
    jobname = opts.wqsub_name+'_'+cmd_exe
    stdout_fn = jobname+opts.wqsub_ext
    stdout = os.path.abspath(stdout_fn)
    fpath,fname,fbase,fext = get_file_parts(stdout)
    stderr = os.path.abspath(os.path.join(jobname+'.err'))

    # get the user's current environment and put it into the execute script
    if opts.wqsub_no_env :
        env_str = '# local environment variables omitted'
    else :
        env_str = '#%s -V'%drm_symb[opts.drm]

    # construct the script
    addnl_params = []
    for addnl in opts.drm_args :
        addnl_params.append('#%s %s'%(drm_symb[opts.drm],addnl))
    addnl_params = '\n'.join(addnl_params)

    job_dict = {'jobname':fname,