Exemplo n.º 1
0
def do_dbmgr( args, dbh = None, warning = True ):

    if not dbh:
        dbh = get_dbhandler(args, initial = args.initdb)

    if args.initbatch is not False:
        do_initbatch(args, dbh)

    elif args.uploadsamples is not False:
        do_uploadsamples(args, dbh)

    elif args.uploadgenotypes is not False:
        do_uploadgenotypes(args, dbh)

    elif args.uploadvcf is not False:
        do_uploadvcf(args, dbh)

    elif args.importpanels is not False:
        do_importpanels(args, dbh)

    elif args.createpca is not False:
        do_createpca(args, dbh)

    else:
        cerr('WARN: command not specified!')
Exemplo n.º 2
0
 def get_filtered_sample_sets(self, sample_ids = None):
     if self._filtered_sample_sets is None or sample_ids:
         if not sample_ids:
             sample_ids = self.get_analytical_sets().get_filtered_sample_ids()
         self._filtered_sample_sets = self.get_sample_sets().filtered( sample_ids )
         cerr('[query]: filtered total samples: %d' %
                 self._filtered_sample_sets.total_samples)
     return self._filtered_sample_sets
Exemplo n.º 3
0
 def get_analytical_sets(self, sample_ids = None):
     if self._analytical_sets is None or sample_ids:
         cerr('[query]: getting initial analytical sets')
         sample_sets = self.get_sample_sets( sample_ids )
         self._analytical_sets = get_analytical_sets( self._dbh, sample_sets,
                                     self._params['filter'] )
         cerr('[query]: initial total samples: %d' % self._analytical_sets.total_samples)
     return self._analytical_sets
Exemplo n.º 4
0
def do_importpanels(args, dbh):

    if not args.infile:
        cexit('ERR: please provide yaml input file')

    d = yaml.load( open(args.infile) )
    panel_specs = d['panels']

    for panel_code, panel_spec in panel_specs.items():
        loci_pos = panel_spec['loci']
        loci = [ dbh.get_locus(refseq, pos)  for refseq, pos in loci_pos ]
        panel = dbh.Panel()
        panel.code = panel_code
        dbh.session().add(panel)
        for locus in loci:
            panel.loci.append( locus )
        cerr('[INFO: panel %s has been added with %d loci ]' % (panel.code, len(panel.loci)))
Exemplo n.º 5
0
def main():

    greet()

    command = sys.argv[1]
    opt_args = sys.argv[2:]

    cerr('Running command: %s' % command)

    try:
        M = importlib.import_module('spatools.scripts.' + command)
    except ImportError:
        cerr('Cannot import script name: %s' % command)
        sys.exit(101)

    parser = M.init_argparser()
    args = parser.parse_args(opt_args)
    M.main(args)
Exemplo n.º 6
0
    def get_filtered_analytical_sets(self, sample_ids = None):
        if self._filtered_analytical_sets is None or sample_ids:

            # get initial sample set, and filter the sample set by sample ids
            filtered_sample_sets = self.get_filtered_sample_sets(sample_ids)

            # create new analytical sets based on the filtered sample sets
            cerr('[query]: getting analytical sets with filtered sample sets')
            filtered_analytical_sets = get_analytical_sets( self._dbh, filtered_sample_sets,
                                        self._params['filter'] )
            cerr('[query]: filtered total samples: %d'
                        % filtered_analytical_sets.total_samples)

            # get filtered marker ids
            filtered_marker_ids = filtered_analytical_sets.get_filtered_marker_ids()
            cerr('[query]: filtered marker ids: %s' % str(filtered_marker_ids))

            # filter markers by retaining marker ids and removing others
            filtered_analytical_sets = get_analytical_sets( self._dbh, filtered_sample_sets,
                                        self._params['filter'],
                                        marker_ids = filtered_marker_ids )

            self._filtered_analytical_sets = filtered_analytical_sets

        return self._filtered_analytical_sets
Exemplo n.º 7
0
 def __init__(self, dbfile, initial=False):
     cerr("Opening db: %s" % dbfile)
     if not initial and not os.path.isfile(dbfile):
         cerr('ERR - sqlite db file not found: %s' % dbfile)
         sys.exit(1)
     if initial and os.path.isfile(dbfile):
         cerr('ERR - sqlite db file already exists: %s' % dbfile)
         sys.exit(1)
     self.dbfile = dbfile
     self.engine, self.session = schema.engine_from_file(dbfile)
Exemplo n.º 8
0
def main( args, dbh=None ):

    cerr('spatools dbmgr')

    if not args.test and args.commit :
        with transaction.manager:
            do_dbmgr(args, dbh)
            cerr('** COMMIT to database **')

    else:
        cerr('WARNING -- running without commiting to database!')
        if not args.test:
            keys = input('Do you want to continue [y/n]? ')
            if not keys.lower().strip().startswith('y'):
                sys.exit(1)

        do_dbmgr(args, dbh)
Exemplo n.º 9
0
def do_uploadvcf(args, dbh):

    # search for batch
    if not args.batch:
        cexit('ERR: please provide batch code')

    batch = dbh.Batch.search(args.batch, dbh.session())
    cerr('INFO: using batch [%s]' % batch.code)

    import allel

    # read vcf (will consume memory, unfortunately)
    callset = allel.read_vcf(args.infile,
        fields = ['samples', 'variants/CHROM', 'variants/POS', 'variants/REF', 'variants/ALT', 'calldata/AD'])

    exists = not_exists = 0

    # prepare sample list
    samples = []
    for sample_code in callset['samples']:
        # search samples
        sample = batch.search_sample(sample_code)
        if not sample:
            cerr('WARN: sample not found: %s' % sample_code)
            not_exists += 1
            samples.append(None)
        else:
            samples.append(sample)
            exists += 1

    cerr('INFO: found %d samples, missing %d samples' % (exists, not_exists))

    # prepare locus list
    locuses = []
    for chrom, pos, ref, alt in zip(
        callset['variants/CHROM'], callset['variants/POS'], callset['variants/REF'], callset['variants/ALT']):

        #import IPython; IPython.embed()
        pos = int(pos) # force from int32 to int
        locus = dbh.get_locus_by_pos(chrom, pos)
        if not locus:

            # create and flush locus
            locus_code = '%s:%d' % (chrom, pos)
            locus = dbh.Locus(code=locus_code, refseq=chrom, position=int(pos), ref=ref)
            alts = alt.split(',')
            locus.alt = alts[0]
            if len(alts) >= 2:
                locus.alt2 = alts[1] if len(alts[1]) == 1 else 'X'
                if len(alts) >= 3:
                    locus.alt3 = alts[2] if len(alts[2]) == 1 else 'X'
            dbh.session().add( locus )
            dbh.session().flush([locus])

        locuses.append(locus)

    # based on sample list and locus list, generate genotype

    ad = allel.GenotypeArray(callset['calldata/AD'])

    for sample_idx in range(len(samples)):
        sample = samples[sample_idx]
        if sample is None: continue

        for locus_idx in range(len(locuses)):
            locus = locuses[locus_idx]

            ref = callset['variants/REF'][locus_idx]
            alts = callset['variants/ALT'][locus_idx]
            depth = { 'A': -1, 'C': -1, 'G': -1, 'T': -1}
            depth[ref] = int(ad[locus_idx, sample_idx][0])

            for i, a in enumerate(alts,1):
                if not a: continue
                if len(a) > 1: continue
                depth[a] = int(ad[locus_idx, sample_idx][i])

            genotype = dbh.Genotype(sample_id = sample.id, locus_id = locus.id,
                        A = depth['A'], C = depth['C'], T = depth['T'], G = depth['G'])
            genotype.call, genotype.raw_qual = basecall(depth)
            dbh.session().add( genotype )

    dbh.session().flush()
Exemplo n.º 10
0
def do_uploadgenotypes(args, dbh):

    # search for batch
    if not args.batch:
        cexit('ERR: please provide batch code')

    batch = dbh.Batch.search(args.batch, dbh.session())
    cerr('INFO: using batch [%s]' % batch.code)

    name, ext = os.path.splitext( args.infile )
    if ext.lower() in [ '.csv', '.tab', '.tsv', '.txt' ]:

        # convert to dictionary
        infile = open(args.infile)
        buf, delim = detect_buffer( infile.read() )

        try:
            ## the csv2dict function has to be sample-specific method
            ## use batch.Sample.csv2dict() ??
            genotypes, errlog, codes = dbh.Genotype.csv2dict(
                            StringIO(buf),
                            with_report=True,
                            delimiter = delim )
        except ValueError as err:
            return error_page(request,  'ValueError: {0}'.format(err) )


    elif ext.lower() in [ '.json', '.yaml', '.yml' ]:
        payload = yaml.load( open( args.infile) )
        codes = payload['codes']
        genotypes = payload['genotypes']

    exists = not_exists = 0
    assay_set = {}

    for sample_code in genotypes:

        genotype_set = genotypes[sample_code]


        # search samples
        sample = batch.search_sample(sample_code)
        if not sample:
            cerr('WARN: sample not found: %s' % sample_code)
            not_exists += 1
            continue


        exists += 1

        # set the genotype
        for (assay_code, assay_data) in genotype_set['assays'].items():
            print(assay_code)
            if assay_code not in assay_set:

                assay = dbh.get_locus_by_code(assay_code)
                if not assay:

                    # create and flush assay
                    assay = dbh.Locus(code=assay_code, refseq=assay_data['refseq'], position=assay_data['position'])
                    dbh.session().add( assay )
                    dbh.session().flush([assay])

                assay_set[assay_code] = assay
                cerr('INFO: creating new assay [%s]' % assay.code)

            else:
                assay = assay_set[assay_code]

            genotype = dbh.Genotype(sample_id = sample.id, locus_id = assay.id,
                        A = assay_data['A'], C = assay_data['C'], T = assay_data['T'], G = assay_data['G'])
            genotype.call, genotype.raw_qual = basecall(assay_data)
            dbh.session().add( genotype )

        dbh.session().flush()

    cerr('WARN: assays %d' % len(assay_set))

    cerr('INFO: found %d samples, not found %s samples' % (exists, not_exists))

    #import pprint
    #pprint.pprint(genotypes)

    cerr('INFO: Parsing %s samples' % len(genotypes))
Exemplo n.º 11
0
def do_uploadsamples(args, dbh):

    # search for batch
    if not args.batch:
        cexit('ERR: please provide batch code')

    batch = dbh.Batch.search(args.batch, dbh.session())
    cerr('INFO: using batch [%s]' % batch.code)

    name, ext = os.path.splitext( args.infile )
    if ext.lower() in [ '.csv', '.tab', '.tsv' ]:

        # convert to dictionary
        infile = open(args.infile)
        buf, delim = detect_buffer( infile.read() )

        try:
            ## the csv2dict function has to be sample-specific method
            ## use batch.Sample.csv2dict() ??
            samples, errlog, codes = batch.get_sample_class().csv2dict(
                            StringIO(buf),
                            with_report=True,
                            delimiter = delim )
        except:
            raise


    elif ext.lower() in [ '.json', '.yaml', '.yml' ]:
        payload = yaml.load( open( args.infile) )
        sample_codes = payload['codes']
        dict_samples = payload['samples']

    else:
        cexit('E: unknown extension file!')

    #print(dict_samples)

    # insert sample dicts to target batch

    inserts = 0
    updates = 0
    option = 'A'

    # updating location first
    null_location = dbh.search_location(auto=True)
    session = dbh.session()

    with session.no_autoflush:

      #for (sample_code, dict_sample) in samples.items():
      for sample_code in codes:
        dict_sample = samples[sample_code]
        # check sanity
        #if sample_code != sample['code']:
        #    pass

        db_sample = batch.search_sample( sample_code )

        if option == 'A':
            if not db_sample:
                db_sample = batch.add_sample( sample_code )
                db_sample.location = null_location
                inserts += 1
            else:
                updates += 1

        elif option == 'U':
            if not db_sample:
                continue
            updates += 1

        elif option == 'N':
            if db_sample: continue
            db_sample = batch.add_sample( sample_code )
            db_sample.location = null_location
            inserts += 1

        else:
            return error_page('Invalid option')
        db_sample.update( dict_sample )
        cerr('Flushing sample: %s' % db_sample.code)
        session.flush([db_sample])

    cerr("Updating %d samples, inserting %d samples" % (updates, inserts))
Exemplo n.º 12
0
def greet():
    cerr('spatools - Python-based SNP processing tools')
Exemplo n.º 13
0
def usage():
    cerr('Usage:')
    cerr('\t%s command [options]' % sys.argv[0])
    sys.exit(0)