示例#1
0
文件: zippy.py 项目: shesketh/zippy
def main():
    from zippylib import ascii_encode_dict
    from zippylib import banner

    print >> sys.stderr, banner(__version__)

    parser = ArgumentParser(prog="zippy.py",
                            description='Zippy - Primer design and database')
    parser.add_argument('--version', action='version', version='%(prog)s '+__version__+'('+__status__+')',\
        help="Displays version")

    #   configuration files
    global_group = parser.add_argument_group('Global options')
    global_group.add_argument("-c", dest="config", default='zippy.json',metavar="JSON_FILE", \
        help="configuration file [zippy.json]")
    global_group.add_argument("--tiers", dest="tiers", default='0,1,2', \
        help="Allowed design tiers (0,1,...,n)")

    # run modes
    subparsers = parser.add_subparsers(help='Help for subcommand')

    ## add primers
    parser_add = subparsers.add_parser(
        'add', help='Add previously designed primers to database')
    parser_add.add_argument("primers", default=None, metavar="FASTA/TAB", \
        help="Primers or locations to add to database")
    parser_add.set_defaults(which='add')

    ## retrieve
    parser_retrieve = subparsers.add_parser('get', help='Get/design primers')
    parser_retrieve.add_argument("targets", default=None, metavar="VCF/BED/Interval/GenePred", \
        help="File with intervals of interest or CHR:START-END (mandatory for gap-PCR)")
    parser_retrieve.add_argument("--design", dest="design", default=False, action="store_true", \
        help="Design primers if not in database")
    parser_retrieve.add_argument("--gap", dest="gap", default=None, metavar="CHR:START-END", \
        help="Second break point for gap-PCR")
    parser_retrieve.add_argument("--nostore", dest="store", default=True, action='store_false', \
        help="Do not store result in database")
    parser_retrieve.add_argument("--outfile", dest="outfile", default='', type=str, \
        help="Output file name")
    parser_retrieve.set_defaults(which='get')

    ## query database for primers by name
    parser_query = subparsers.add_parser(
        'query',
        help='Query database for primers with specified sub-string in name')
    parser_query.add_argument("subString", default=None, metavar="Sub-string within name", \
        help="String found within primer name")
    parser_query.set_defaults(which='query')

    ## batch
    parser_batch = subparsers.add_parser(
        'batch', help='Batch design primers for sample list')
    parser_batch.add_argument("targets", default=None, metavar="FILE1,FILE2,...", \
        help="SNPpy result table(s) ")
    parser_batch.add_argument("--predesign", dest="predesign", default=False, action="store_true", \
        help="Design primers for all genes in batch")
    parser_batch.add_argument("--nodesign", dest="design", default=True, action="store_false", \
        help="Skip primer design if not in database")
    parser_batch.add_argument("--tiers", dest="tiers", default='0,1,2', \
        help="Allowed design tiers (0,1,...,n)")
    parser_batch.add_argument("--outfile", dest="outfile", default='', type=str, \
        help="Create worksheet PDF, order and robot CSV")
    parser_batch.set_defaults(which='batch')

    ## update
    parser_update = subparsers.add_parser(
        'update', help='Update status and location of primers')
    parser_update.add_argument('-l', dest="location", nargs=3, \
        help="Update storage location of primer pair (primerid vessel well)")
    parser_update.add_argument('-t', dest="locationtable", \
        help="Batch update storage locations from TSV (primerid vessel well)")
    parser_update.add_argument("--force", dest="force", default=False, action='store_true', \
        help="Force Location update (resets existing)")
    parser_update.add_argument('-b', dest="blacklist", type=str, \
        help="Blacklist primer")
    parser_update.set_defaults(which='update')

    ## dump specific datasets from database
    parser_dump = subparsers.add_parser('dump', help='Data dump')
    parser_dump.add_argument("--amplicons", dest="amplicons", default='', type=str, \
        help="Retrieve amplicons of given size (eg. 10-1000)")
    parser_dump.add_argument("--ordersheet", dest="ordersheet", default=False, action="store_true", \
        help="IDT order sheet (primer pairs with no status marker)")
    parser_dump.add_argument("--locations", dest="locations", default=False, action="store_true", \
        help="Primer locations")
    parser_dump.add_argument("--redundancies", dest="redundancies", default=False, action="store_true", \
        help="Primers with same sequence and tag")
    parser_dump.add_argument("--table", dest="table", default=False, action="store_true", \
        help="Primer pair table with locations")
    parser_dump.add_argument("--outfile", dest="outfile", default='', type=str, \
        help="Output file name")
    parser_dump.set_defaults(which='dump')

    options = parser.parse_args()

    # read config and open database
    with open(options.config) as conf:
        config = json.load(conf, object_hook=ascii_encode_dict)
    here = config['primerbed'] if 'primerbed' in config.keys(
    ) and config['primerbed'] else None
    db = PrimerDB(config['database'], dump=here)

    if options.which == 'add':  # read primers and add to database
        # import primer pairs
        if options.primers.split('.')[-1].startswith('fa'):
            pairs = importPrimerPairs(
                options.primers, config,
                primer3=False)  # import and locate primer pairs
            print >> sys.stderr, "Storing Primers..."
            db.addPair(
                *pairs
            )  # store pairs in database (assume they are correctly designed as mispriming is ignored and capped at 1000)
            sys.stderr.write('Added {} primer pairs to database\n'.format(
                len(pairs)))
        # store locations if table
        if not options.primers.split('.')[-1].startswith(
                'fa'):  # assume table format
            locations = importPrimerLocations(options.primers)
            print >> sys.stderr, "Setting Primer locations..."
            db.addLocations(*locations.items())
            sys.stderr.write(
                'Added {} locations for imported primers\n'.format(
                    len(locations)))
    elif options.which == 'dump':  # data dump fucntions (`for bulk downloads`)
        if options.amplicons:
            try:
                l = options.amplicons.split('-')
                assert len(l) == 2
                amplen = map(int, l)
            except (AssertionError, ValueError):
                raise ConfigError('must give amplicon size to retrieve')
            except:
                raise
            else:
                # get amplicons amplen
                data, colnames = db.dump('amplicons', size=amplen)
        elif options.ordersheet:
            data, colnames = db.dump('ordersheet', **config['ordersheet'])
        elif options.locations:
            data, colnames = db.dump('locations')
        elif options.table:
            data, colnames = db.dump('table')
        elif options.redundancies:
            data, colnames = db.getRedundantPrimers()
        else:
            print >> sys.stderr, "What to dump stranger?"
            sys.exit(1)
        # format data output
        if options.outfile:
            dump = Data(data, colnames)
            dump.writefile(options.outfile)  # sets format by file extension
        else:
            print '\t'.join(colnames)
            for row in data:
                print '\t'.join(map(str, row))
    elif options.which == 'update':  #update location primer pairs are stored
        if options.location:
            primer, vessel, well = options.location
            updateLocation(primer, Location(vessel, well), db, options.force)
        if options.locationtable:
            updateList = readprimerlocations(options.locationtable)
            for item in updateList:  # [ Primer, Location ]
                updateLocation(item[0], item[1], db, options.force)
        if options.blacklist:
            print >> sys.stderr, 'BLACKLISTED PAIRS: {}'.format(','.join(
                db.blacklist(options.blacklist)))
            print >> sys.stderr, 'REMOVED ORPHANS:   {}'.format(','.join(
                db.removeOrphans()))
    elif options.which == 'get':  # get primers for targets (BED/VCF or interval)
        zippyPrimerQuery(config, options.targets, options.design, options.outfile, \
            db, options.store, map(int,options.tiers.split(',')), options.gap)
    elif options.which == 'batch':
        zippyBatchQuery(config, options.targets.split(','), options.design, options.outfile, \
            db, options.predesign, map(int,options.tiers.split(',')))
    elif options.which == 'query':
        searchByName(options.subString, db)
示例#2
0
    parser_dump.add_argument("--outfile", dest="outfile", default='', type=str, \
        help="Output file name (bed,interval,fasta)")
    parser_dump.set_defaults(which='dump')

    options = parser.parse_args()

    ## read configuration (convert unicode to ascii string)
    def ascii_encode_dict(data):
        ascii_encode = lambda x: x.encode('ascii') if type(x) is unicode else x
        return dict(map(ascii_encode, pair) for pair in data.items())

    with open(options.config) as conf:
        config = json.load(conf, object_hook=ascii_encode_dict)

    # open database handler
    db = PrimerDB(config['database'])

    if options.which == 'add':  # read primers and add to database
        pairs = importPrimerPairs(
            options.primers)  # import (and locate) primer pairs
        db.addPair(
            *pairs
        )  # store pairs in database (assume they are correctly designed as mispriming is ignored and capped at 1000)
        sys.stderr.write('Added {} primer pairs to database\n'.format(
            len(pairs)))
        if options.debug:
            print repr(db)  # show database content (debugging only)
    elif options.which == 'dump':  # data dump fucntions (`for bulk downloads`)
        if options.amplicons:
            # dump amplicons fo given size to stdout
            try: