示例#1
0
def do_summarize(args):

    d = pandas.read_table(args.infile)
    peaks = d[ d['MARKER'] == args.marker ]
    stats = bin_stats(peaks)
    for s in sorted(stats.keys()):
        cout(stats[s].repr())
示例#2
0
    def align(self, parameters=None, anchor_pairs=None):

        # sanity checks
        if self.marker.code != 'ladder':
            raise RuntimeError(
                'E: align() must be performed on ladder channel!')

        if parameters:
            self.scan(parameters)  # in case this channel hasn't been scanned

        ladder = self.fsa.panel.get_ladder()

        # prepare ladder qcfunc
        if 'qcfunc' not in ladder:
            ladder['qcfunc'] = algo.generate_scoring_function(
                ladder['strict'], ladder['relax'])

        start_time = time.process_time()
        result = algo.align_peaks(self, parameters, ladder, anchor_pairs)
        dpresult = result.dpresult
        fsa = self.fsa
        fsa.z = dpresult.z
        fsa.rss = dpresult.rss
        fsa.nladder = len(dpresult.sized_peaks)
        fsa.score = result.score
        fsa.duration = time.process_time() - start_time
        fsa.status = const.assaystatus.aligned
        fsa.ztranspose = dpresult.ztranspose

        #import pprint; pprint.pprint(dpresult.sized_peaks)
        #print(fsa.z)
        cout('O: Score %3.2f | %5.2f | %d/%d | %s | %5.1f | %s' %
             (fsa.score, fsa.rss, fsa.nladder, len(
                 ladder['sizes']), result.method, fsa.duration, fsa.filename))
示例#3
0
文件: dbmgr.py 项目: edawine/fatools
def do_renamefsa(args, dbh):

    cout('Renaming FSA files from input file: %s' % args.infile)

    b = dbh.get_batch(args.batch)

    inrows = csv.DictReader( open(args.infile),
                delimiter = ',' if args.infile.endswith('.csv') else '\t' )
    #next(inrows)

    total_fsa = 0
    line_counter = 1
    for r in inrows:

        line_counter += 1

        #if not row[0] or row[0].startswith('#'):
        #    continue
        if not (r['FILENAME'] and r['SAMPLE']) or '#' in [ r['FILENAME'][0], r['SAMPLE'][0] ]:
            continue

        try:
            sample_code, fsa_filename, fsa_new_filename = r['SAMPLE'], r['FILENAME'], r['NEWNAME']
            s = b.search_sample(sample_code)
            a = s.assays.filter( dbh.Assay.filename == fsa_filename ).one()
            a.filename = fsa_new_filename

        except:
            cerr('Error in executing line %d' % line_counter)
            raise
示例#4
0
文件: setup.py 项目: edawine/fatools
def setup(session):

    # create undefined marker
    marker = schema.Marker( code = 'undefined', species = 'X' )
    cout("INFO - marker 'undefined' created.")
    session.add(marker)

    # create ladder marker
    marker = schema.Marker( code =  'ladder', species = 'X' )
    cout("INFO - marker 'ladder' created.")
    session.add(marker)

    # create combined marker
    marker = schema.Marker( code = 'combined', species = 'X' )
    cout("INFO - marker 'combined' created.")
    session.add(marker)

    # create default panel
    panel = schema.Panel( code = 'undefined' )
    cout("INFO - panel 'undefined' created.")
    session.add(panel)

    # create default batch (for bin holder)
    batch = schema.Batch( code = 'default' )
    batch.assay_provider = ''
    batch.species = 'X'
    cout("INFO - batch 'default' created.")
    session.add(batch)
示例#5
0
文件: mixin2.py 项目: edawine/fatools
    def align(self, parameters=None, anchor_pairs=None):

        # sanity checks
        if self.marker.code != 'ladder':
            raise RuntimeError('E: align() must be performed on ladder channel!')

        if parameters:
            self.scan( parameters )         # in case this channel hasn't been scanned

        ladder = self.fsa.panel.get_ladder()

        # prepare ladder qcfunc
        if 'qcfunc' not in ladder:
            ladder['qcfunc'] =  algo.generate_scoring_function(
                                            ladder['strict'], ladder['relax'] )

        start_time = time.process_time()
        result = algo.align_peaks(self, parameters, ladder, anchor_pairs)
        dpresult = result.dpresult
        fsa = self.fsa
        fsa.z = dpresult.z
        fsa.rss = dpresult.rss
        fsa.nladder = len(dpresult.sized_peaks)
        fsa.score = result.score
        fsa.duration = time.process_time() - start_time
        fsa.status = const.assaystatus.aligned
        fsa.ztranspose = dpresult.ztranspose

        #import pprint; pprint.pprint(dpresult.sized_peaks)
        #print(fsa.z)
        cout('O: Score %3.2f | %5.2f | %d/%d | %s | %5.1f | %s' %
            (fsa.score, fsa.rss, fsa.nladder, len(ladder['sizes']), result.method,
            fsa.duration, fsa.filename) )
示例#6
0
文件: facmd.py 项目: mkdryden/fatools
def do_listpeaks(args, dbh):

    assay_list = get_assay_list(args, dbh)
    if args.marker:
        markers = [dbh.get_marker(code) for code in args.marker.split(',')]
    else:
        markers = None

    if markers:
        cerr('Markers: %s' % ','.join(m.code for m in markers))

    if args.outfile != '-':
        out_stream = open(args.outfile, 'w')
    else:
        out_stream = sys.stdout

    out_stream.write('SAMPLE\tFILENAME\tDYE\tRTIME\tHEIGHT\tSIZE\tSCORE\tID\n')

    for (assay, sample_code) in assay_list:
        cout('Sample: %s assay: %s' % (sample_code, assay.filename))
        for channel in assay.channels:
            if markers and channel.marker not in markers:
                continue
            cout('Marker => %s | %s [%d]' %
                 (channel.marker.code, channel.dye, len(channel.alleles)))
            for p in channel.alleles:
                out_stream.write('%s\t%s\t%s\t%d\t%d\t%5.3f\t%3.2f\t%d\n' %
                                 (sample_code, assay.filename, channel.dye,
                                  p.rtime, p.height, p.size, p.qscore, p.id))
示例#7
0
文件: facmd.py 项目: edawine/fatools
def do_listpeaks( args, dbh ):

    assay_list = get_assay_list( args, dbh )
    if args.marker:
        markers = [ dbh.get_marker( code ) for code in args.marker.split(',') ]
    else:
        markers = None

    if markers:
        cerr('Markers: %s' % ','.join( m.code for m in markers ))

    if args.outfile != '-':
        out_stream = open(args.outfile, 'w')
    else:
        out_stream = sys.stdout

    out_stream.write('SAMPLE\tFILENAME\tDYE\tRTIME\tHEIGHT\tSIZE\tSCORE\tID\n')

    for (assay, sample_code) in assay_list:
        cout('Sample: %s assay: %s' % (sample_code, assay.filename))
        for channel in assay.channels:
            if markers and channel.marker not in markers:
                continue
            cout('Marker => %s | %s [%d]' % (channel.marker.code, channel.dye,
                    len(channel.alleles)))
            for p in channel.alleles:
                out_stream.write('%s\t%s\t%s\t%d\t%d\t%5.3f\t%3.2f\t%d\n' %
                    (sample_code, assay.filename, channel.dye, p.rtime, p.height, p.size, p.qscore, p.id)
                )
示例#8
0
def do_renamefsa(args, dbh):

    cout('Renaming FSA files from input file: %s' % args.infile)

    b = dbh.get_batch(args.batch)

    inrows = csv.DictReader(
        open(args.infile),
        delimiter=',' if args.infile.endswith('.csv') else '\t')
    #next(inrows)

    total_fsa = 0
    line_counter = 1
    for r in inrows:

        line_counter += 1

        #if not row[0] or row[0].startswith('#'):
        #    continue
        if not (r['FILENAME'] and r['SAMPLE']) or '#' in [
                r['FILENAME'][0], r['SAMPLE'][0]
        ]:
            continue

        try:
            sample_code, fsa_filename, fsa_new_filename = r['SAMPLE'], r[
                'FILENAME'], r['NEWNAME']
            s = b.search_sample(sample_code)
            a = s.assays.filter(dbh.Assay.filename == fsa_filename).one()
            a.filename = fsa_new_filename

        except:
            cerr('Error in executing line %d' % line_counter)
            raise
示例#9
0
def do_summarize(args):

    d = pandas.read_table(args.infile)
    peaks = d[d['MARKER'] == args.marker]
    stats = bin_stats(peaks)
    for s in sorted(stats.keys()):
        cout(stats[s].repr())
示例#10
0
def do_uploadfsa(args, dbh):

    cout('Uploading FSA files from input file: %s' % args.infile)

    b = dbh.get_batch(args.batch)

    inrows = csv.DictReader(
        open(args.infile),
        delimiter=',' if args.infile.endswith('.csv') else '\t')
    #next(inrows)

    total_fsa = 0
    line_counter = 1
    for r in inrows:

        line_counter += 1

        #if not row[0] or row[0].startswith('#'):
        #    continue
        if not (r['FILENAME'] and r['SAMPLE']) or '#' in [
                r['FILENAME'][0], r['SAMPLE'][0]
        ]:
            continue

        #if len(row) < 3:
        #    cerr('ERR - line %d only has %d item(s)' % (line_counter, len(row)))

        sample_code, fsa_filename, fsa_panel = r['SAMPLE'], r['FILENAME'], r[
            'PANEL']
        if r['OPTIONS']:
            options = tokenize(r['OPTIONS'])
        else:
            options = None

        try:

            s = b.search_sample(sample_code)
            if not s:
                cerr('ERR - sample %s does not exist' % sample_code)
                sys.exit(1)

            with open(args.indir + '/' + fsa_filename, 'rb') as f:
                trace = f.read()

            a = s.add_fsa_assay(trace,
                                filename=fsa_filename,
                                panel_code=fsa_panel,
                                options=options,
                                species=args.species,
                                dbhandler=dbh)
            cerr('INFO - sample: %s assay: %s panel: %s has been uploaded' %
                 (s.code, a.filename, fsa_panel))

        except Exception as exc:

            if not args.test:
                raise
            cerr('ERR - line %d' % line_counter)
            cerr(' => %s' % str(exc))
示例#11
0
def do_info(args, dbh):


    traces = get_traces(args, dbh)

    for abspath, trace in traces:
        cout('I - trace: %s' % abspath)
        cout('I - runtime: %s' % trace.get_run_start_time())
示例#12
0
    def align(self, parameters, ladder=None, anchor_pairs=None):

        # sanity checks
        if self.marker.code != 'ladder':
            raise RuntimeError(
                'E: align() must be performed on ladder channel!')

        ladder = self.fsa.panel.get_ladder()

        # prepare ladder qcfunc
        if 'qcfunc' not in ladder:
            ladder['qcfunc'] = algo.generate_scoring_function(
                ladder['strict'], ladder['relax'])

        start_time = time.process_time()
        result = algo.align_peaks(self, parameters, ladder, anchor_pairs)
        dpresult = result.dpresult
        fsa = self.fsa
        fsa.z = dpresult.z
        fsa.rss = dpresult.rss
        fsa.nladder = len(dpresult.sized_peaks)
        fsa.score = result.score
        fsa.duration = time.process_time() - start_time

        # set allele sizes from ladder steps
        alleles = self.get_alleles()
        alleles.sort(key=lambda x: x.rtime)

        ladder_sizes = ladder['sizes']
        ladder_sizes.sort()

        for allele, ladder_size in zip(alleles, ladder_sizes):
            allele.size = ladder_size

        # check the allele method
        method = parameters.allelemethod

        if method == const.allelemethod.leastsquare:
            fsa.allele_fit_func = algo.least_square(alleles, self.fsa.z)
        elif method == const.allelemethod.cubicspline:
            fsa.allele_fit_func = algo.cubic_spline(alleles)
        elif method == const.allelemethod.localsouthern:
            fsa.allele_fit_func = algo.local_southern(alleles)
        else:
            raise RuntimeError

        #min_rtime = ladders[1].rtime
        #max_rtime = ladders[-2].rtime
        fsa.min_rtime = parameters.ladder.min_rtime
        fsa.max_rtime = parameters.ladder.max_rtime

        #import pprint; pprint.pprint(dpresult.sized_peaks)
        #print(fsa.z)
        if is_verbosity(4):
            cout('O: Score %3.2f | %5.2f | %d/%d | %s | %5.1f | %s' %
                 (fsa.score, fsa.rss, fsa.nladder, len(ladder['sizes']),
                  result.method, fsa.duration, fsa.filename))
示例#13
0
def do_corralleles(args, dbh):

    from fatools.lib.analysis.correlation import correlate_alleles

    query = get_query( args, dbh )
    analytical_sets = query.get_filtered_analytical_sets()
    for marker_code in analytical_sets.marker_ids:

        report = correlate_alleles(analytical_sets[0], analytical_sets[1], marker=marker_code)
        cout( make_correlate_report( report ) )
示例#14
0
文件: dbmgr.py 项目: edawine/fatools
def do_initbatch(args, dbh):

    b = dbh.Batch()
    b.code = args.initbatch
    b.species = args.species
    b.assay_provider = args.assayprovider
    # set default bin_batch to batch default
    def_batch = dbh.get_batch('default')
    b.bin_batch = def_batch
    dbh.session.add(b)
    cout('INFO: batch %s added.' % b.code)
示例#15
0
def do_initbatch(args, dbh):

    b = dbh.Batch()
    b.code = args.initbatch
    b.species = args.species
    b.assay_provider = args.assayprovider
    # set default bin_batch to batch default
    def_batch = dbh.get_batch('default')
    b.bin_batch = def_batch
    dbh.session.add(b)
    cout('INFO: batch %s added.' % b.code)
示例#16
0
def do_export(args, dbh):

    from fatools.lib.analytics.export import export

    query = get_query( args, dbh )
    analytical_sets = query.get_filtered_analytical_sets()
    if analytical_sets.total_samples <= 0:
        cexit('ERR - query does not yield any sample data')
    else:
        cerr('INFO - total sampel number: %d' % analytical_sets.total_samples)
    output = export( analytical_sets, dbh, outfile = args.outfile, format = args.outformat )
    cout('Done.')
示例#17
0
def do_corralleles(args, dbh):

    from fatools.lib.analysis.correlation import correlate_alleles

    query = get_query(args, dbh)
    analytical_sets = query.get_filtered_analytical_sets()
    for marker_code in analytical_sets.marker_ids:

        report = correlate_alleles(analytical_sets[0],
                                   analytical_sets[1],
                                   marker=marker_code)
        cout(make_correlate_report(report))
示例#18
0
def do_allelesummary(args, dbh):

    from fatools.lib.analytics.summary import summarize_alleles, plot_alleles

    query = get_query(args, dbh)
    analytical_sets = query.get_filtered_analytical_sets()
    report = summarize_alleles(analytical_sets)
    cout(make_sample_report(analytical_sets.get_sample_sets()))
    cout(make_allele_report(report, dbh))

    if args.outplot:
        plot_alleles(report, args.outplot)
示例#19
0
def do_allelesummary(args, dbh):

    from fatools.lib.analytics.summary import summarize_alleles, plot_alleles

    query = get_query( args, dbh )
    analytical_sets = query.get_filtered_analytical_sets()
    report = summarize_alleles( analytical_sets )
    cout( make_sample_report( analytical_sets.get_sample_sets() ) )
    cout( make_allele_report(report, dbh) )

    if args.outplot:
        plot_alleles( report, args.outplot )
示例#20
0
文件: dbmgr.py 项目: edawine/fatools
def do_showsample(args, dbh):

    from fatools.lib.const import channelstatus

    batch = dbh.get_batch(args.batch)
    for code in args.sample.split(','):
        sample = batch.search_sample(code)
        cout('Sample: %s' % sample.code)
        for fsa in sample.assays:
            marker_codes = [ c.marker.code for c in fsa.channels
                    if c.status == channelstatus.assigned]
            cout(' % 3d - %s | %s | %s' %
                    (fsa.id, fsa.filename, fsa.panel.code, ','.join(marker_codes)) )
示例#21
0
文件: dbmgr.py 项目: edawine/fatools
def do_uploadfsa(args, dbh):

    cout('Uploading FSA files from input file: %s' % args.infile)

    b = dbh.get_batch(args.batch)

    inrows = csv.DictReader( open(args.infile),
                delimiter = ',' if args.infile.endswith('.csv') else '\t' )
    #next(inrows)

    total_fsa = 0
    line_counter = 1
    for r in inrows:

        line_counter += 1

        #if not row[0] or row[0].startswith('#'):
        #    continue
        if not (r['FILENAME'] and r['SAMPLE']) or '#' in [ r['FILENAME'][0], r['SAMPLE'][0] ]:
            continue

        #if len(row) < 3:
        #    cerr('ERR - line %d only has %d item(s)' % (line_counter, len(row)))

        sample_code, fsa_filename, fsa_panel = r['SAMPLE'], r['FILENAME'], r['PANEL']
        if r['OPTIONS']:
            options = tokenize( r['OPTIONS'] )
        else:
            options = None

        try:

            s = b.search_sample(sample_code)
            if not s:
                cerr('ERR - sample %s does not exist' % sample_code)
                sys.exit(1)

            with open( args.indir + '/' + fsa_filename, 'rb') as f:
                trace = f.read()

            a = s.add_fsa_assay( trace, filename=fsa_filename, panel_code = fsa_panel,
                        options = options, species = args.species, dbhandler = dbh)
            cerr('INFO - sample: %s assay: %s panel: %s has been uploaded' %
                        (s.code, a.filename, fsa_panel))

        except Exception as exc:

            if not args.test:
                raise
            cerr('ERR - line %d' % line_counter)
            cerr(' => %s' % str(exc))
示例#22
0
def do_export(args, dbh):

    from fatools.lib.analytics.export import export

    query = get_query(args, dbh)
    analytical_sets = query.get_filtered_analytical_sets()
    if analytical_sets.total_samples <= 0:
        cexit('ERR - query does not yield any sample data')
    else:
        cerr('INFO - total sampel number: %d' % analytical_sets.total_samples)
    output = export(analytical_sets,
                    dbh,
                    outfile=args.outfile,
                    format=args.outformat)
    cout('Done.')
示例#23
0
def do_parallel_find_peaks( channel_list, peakdb ):

    import concurrent.futures, pickle

    cerr('I: Processing channel(s)')
    total = len(channel_list)
    counter = 0
    with concurrent.futures.ProcessPoolExecutor() as executor:
        for (tag, peaks) in executor.map( find_peaks_p, channel_list ):
            if peakdb:
                peakdb.Put(tag.encode(), pickle.dumps(peaks))
            else:
                cout('== channel %s\n' % tag )
                cout(str(peaks))
            counter += 1
            cerr('I: [%d/%d] channel %s => %d peak(s)' % (counter, total, tag, len(peaks)))
示例#24
0
文件: dbmgr.py 项目: edawine/fatools
def do_importpanel(args, dbh):

    panels = yaml.load( open(args.infile) )

    for code, panel in panels.items():
        if panel['code'] != code:
            cerr('ERR: code for panel %s is not consistent!' % code)
            sys.exit(1)
        p = dbh.new_panel()
        p.update( panel )
        if args.update:
            db_p = p.sync(dbh.session)
            cout("INFO: panel %s sync'd." % db_p.code)
        else:
            dbh.session.add(p)
            cout("INFO: panel %s added." % p.code)
示例#25
0
def do_importpanel(args, dbh):

    panels = yaml.load(open(args.infile))

    for code, panel in panels.items():
        if panel['code'] != code:
            cerr('ERR: code for panel %s is not consistent!' % code)
            sys.exit(1)
        p = dbh.new_panel()
        p.update(panel)
        if args.update:
            db_p = p.sync(dbh.session)
            cout("INFO: panel %s sync'd." % db_p.code)
        else:
            dbh.session.add(p)
            cout("INFO: panel %s added." % p.code)
示例#26
0
文件: dbmgr.py 项目: bwlang/fatools
def do_viewpeakcachedb(args, dbh):

    from leveldb import LevelDB
    from collections import defaultdict

    ldb = LevelDB(args.peakcachedb, create_if_missing=False)

    batches = defaultdict(int)

    for key in ldb.RangeIter(include_value=False):
        batch_code = bytes(key.split(b'|', 1)[0])
        batches[batch_code] += 1

    cout('Peakcache DB: %s' % args.peakcachedb)
    for (k, v) in batches.items():
        cout('\t%s\t%4d' % (k.decode(), v))
示例#27
0
def do_showsample(args, dbh):

    from fatools.lib.const import channelstatus

    batch = dbh.get_batch(args.batch)
    for code in args.sample.split(','):
        sample = batch.search_sample(code)
        cout('Sample: %s' % sample.code)
        for fsa in sample.assays:
            marker_codes = [
                c.marker.code for c in fsa.channels
                if c.status == channelstatus.assigned
            ]
            cout(
                ' % 3d - %s | %s | %s' %
                (fsa.id, fsa.filename, fsa.panel.code, ','.join(marker_codes)))
示例#28
0
文件: facmd.py 项目: edawine/fatools
def do_parallel_find_peaks( channel_list, peakdb ):

    import concurrent.futures, pickle

    cerr('I: Processing channel(s)')
    total = len(channel_list)
    counter = 0
    with concurrent.futures.ProcessPoolExecutor() as executor:
        for (tag, peaks) in executor.map( find_peaks_p, channel_list ):
            if peakdb:
                peakdb.Put(tag.encode(), pickle.dumps(peaks))
            else:
                cout('== channel %s\n' % tag )
                cout(str(peaks))
            counter += 1
            cerr('I: [%d/%d] channel %s => %d peak(s)' % (counter, total, tag, len(peaks)))
示例#29
0
文件: dbmgr.py 项目: edawine/fatools
def do_viewpeakcachedb(args, dbh):

    from leveldb import LevelDB
    from collections import defaultdict

    ldb = LevelDB(args.peakcachedb, create_if_missing=False)

    batches = defaultdict(int)

    for key in ldb.RangeIter(include_value=False):
        batch_code = bytes(key.split(b'|', 1)[0])
        batches[batch_code] += 1

    cout('Peakcache DB: %s' % args.peakcachedb)
    for (k,v) in batches.items():
        cout('\t%s\t%4d' % (k.decode(),v))
示例#30
0
文件: dbmgr.py 项目: edawine/fatools
def do_viewbin(args, dbh):

    if not args.marker:
        cexit('ERR - please provide marker code')

    markers = [ dbh.get_marker(code) for code in args.marker.split(',') ]
    batch = dbh.get_batch( args.batch or 'default')

    for m in markers:
        cout('Marker: %s' % m.label)
        cout('    Bin   Mean   25%P   75%P   Width')
        cout('  ====================================')
        for binset in m.get_bin(batch).sortedbins:
            cout('   %3d  %5.2f  %5.2f  %5.2f  %4.2f' %
                    (binset[0], binset[1], binset[2], binset[3], binset[3] - binset[2]))
示例#31
0
def do_viewbin(args, dbh):

    if not args.marker:
        cexit('ERR - please provide marker code')

    markers = [dbh.get_marker(code) for code in args.marker.split(',')]
    batch = dbh.get_batch(args.batch or 'default')

    for m in markers:
        cout('Marker: %s' % m.label)
        cout('    Bin   Mean   25%P   75%P   Width')
        cout('  ====================================')
        for binset in m.get_bin(batch).sortedbins:
            cout('   %3d  %5.2f  %5.2f  %5.2f  %4.2f' %
                 (binset[0], binset[1], binset[2], binset[3],
                  binset[3] - binset[2]))
示例#32
0
文件: dbmgr.py 项目: edawine/fatools
def do_showbatches(args, dbh):

    cout('Available batch(es):')
    batches = dbh.get_batches(None)
    for batch in batches:
        cout('  %s' % batch.code)
示例#33
0
文件: dbmgr.py 项目: edawine/fatools
def do_initsample(args, dbh):

    if not args.batch:
        cerr('ERR: batch code must be supplied!')
        sys.exit(1)

    b = dbh.Batch.search(args.batch, dbh.session)
    cout('INFO - using batch code: %s' % b.code)

    name, ext = os.path.splitext( args.infile )

    if ext in [ '.csv', '.tab', '.tsv' ]:

        delim = ',' if ext == '.csv' else '\t'

        dict_samples, errlog, sample_codes = b.get_sample_class().csv2dict(
                open(args.infile), with_report=True, delimiter = delim )

        if dict_samples is None:
            cout('Error processing sample info file')
            cout('\n'.join(errlog))
            cexit('Terminated!')

    elif ext in ['.json', '.yaml']:
        payload = yaml.load( open(args.infile) )
        sample_codes = payload['codes']
        dict_samples = payload['samples']

    inserted=0
    updated=0

    # get default location and subject first (to satisfy RDBMS constraints)
    null_location = dbh.search_location(auto=True)
    #null_subject = dbh.search_subject('null', auto=True) ## <- this shouldn't be here !!

    session = dbh.session()

    with session.no_autoflush:

        for sample_code in sample_codes:
            d_sample = dict_samples[sample_code]

            db_sample = b.search_sample( sample_code )

            if not db_sample:
                db_sample = b.add_sample( sample_code )
                inserted += 1
                cout('INFO - sample: %s added.' % db_sample.code)
                db_sample.location = null_location
                #db_sample.subject = null_subject
                #print(d_sample)
                #dbh.session().flush( [db_sample] )

            else:
                cout('INFO - sample: %s being updated...' % db_sample.code)
                updated += 1

            db_sample.update( d_sample )
            session.flush( [db_sample] )


    cout('INFO - inserted new %d sample(s), updated %d sample(s)' %
            (inserted, updated))

    return


    inrows = csv.reader( open(args.infile),
                delimiter = ',' if args.infile.endswith('.csv') else '\t' )

    next(inrows)    # discard the 1st line

    counter = 0
    for row in inrows:
        s = b.add_sample( row[0] )
        counter += 1
        cout('INFO - sample: %s added.' % s.code)

    cout('INFO - number of new sample(s): %d' % counter)
示例#34
0
def do_samplesummary(args, dbh):

    query = get_query(args, dbh)
    sample_sets = query.get_filtered_sample_sets()
    cout(make_sample_report(sample_sets))
示例#35
0
def do_samplesummary(args, dbh):

    query = get_query( args, dbh )
    sample_sets = query.get_filtered_sample_sets()
    cout( make_sample_report(sample_sets) )
示例#36
0
def do_clearassay(args, dbh):

    cout('Clearing assay...')
示例#37
0
def do_initsample(args, dbh):

    if not args.batch:
        cerr('ERR: batch code must be supplied!')
        sys.exit(1)

    b = dbh.Batch.search(args.batch, dbh.session)
    cout('INFO - using batch code: %s' % b.code)

    name, ext = os.path.splitext(args.infile)

    if ext in ['.csv', '.tab', '.tsv']:

        delim = ',' if ext == '.csv' else '\t'

        dict_samples, errlog, sample_codes = b.get_sample_class().csv2dict(
            open(args.infile), with_report=True, delimiter=delim)

        if dict_samples is None:
            cout('Error processing sample info file')
            cout('\n'.join(errlog))
            cexit('Terminated!')

    elif ext in ['.json', '.yaml']:
        payload = yaml.load(open(args.infile))
        sample_codes = payload['codes']
        dict_samples = payload['samples']

    inserted = 0
    updated = 0

    # get default location and subject first (to satisfy RDBMS constraints)
    null_location = dbh.search_location(auto=True)
    #null_subject = dbh.search_subject('null', auto=True) ## <- this shouldn't be here !!

    session = dbh.session()

    with session.no_autoflush:

        for sample_code in sample_codes:
            d_sample = dict_samples[sample_code]

            db_sample = b.search_sample(sample_code)

            if not db_sample:
                db_sample = b.add_sample(sample_code)
                inserted += 1
                cout('INFO - sample: %s added.' % db_sample.code)
                db_sample.location = null_location
                #db_sample.subject = null_subject
                #print(d_sample)
                #dbh.session().flush( [db_sample] )

            else:
                cout('INFO - sample: %s being updated...' % db_sample.code)
                updated += 1

            db_sample.update(d_sample)
            session.flush([db_sample])

    cout('INFO - inserted new %d sample(s), updated %d sample(s)' %
         (inserted, updated))

    return

    inrows = csv.reader(
        open(args.infile),
        delimiter=',' if args.infile.endswith('.csv') else '\t')

    next(inrows)  # discard the 1st line

    counter = 0
    for row in inrows:
        s = b.add_sample(row[0])
        counter += 1
        cout('INFO - sample: %s added.' % s.code)

    cout('INFO - number of new sample(s): %d' % counter)
示例#38
0
def do_showbatches(args, dbh):

    cout('Available batch(es):')
    batches = dbh.get_batches(None)
    for batch in batches:
        cout('  %s' % batch.code)
示例#39
0
 def initdb(self, create_table=True):
     if create_table:
         schema.Base.metadata.create_all(self.engine)
     from fatools.lib.sqlmodels.setup import setup
     setup(self.session)
     cout('Database at %s has been initialized.' % self.dbfile)
示例#40
0
 def initdb(self, create_table = True):
     if create_table:
         schema.Base.metadata.create_all(self.engine)
     from fatools.lib.sqlmodels.setup import setup
     setup( self.session )
     cout('Database at %s has been initialized.' % self.dbfile)
示例#41
0
文件: dbmgr.py 项目: edawine/fatools
def do_clearassay(args, dbh):

    cout('Clearing assay...')
示例#42
0
def do_listpeaks(args, fsa_list, dbh):

    if args.outfile != '-':
        out_stream = open(args.outfile, 'w')
    else:
        out_stream = sys.stdout

    if args.peaks_format == 'standard':
        out_stream.write(
            'SAMPLE\tFILENAME   \tDYE\tRTIME\tSIZE\tHEIGHT\tAREA\tSCORE\n')
    elif args.peaks_format == 'peakscanner':
        out_stream.write(
            "Dye/Sample Peak,Sample File Name,Type,Size,Height,Area in Point,Area in BP,Corrected Area in BP,Data Point,Begin Point,"
        )
        if args.merge:
            out_stream.write(
                "Begin BP,End Point,End BP,Width in Point,Width in BP,Score,Peak Group,User Comments,User Edit\n"
            )
        else:
            out_stream.write(
                "Begin BP,End Point,End BP,Width in Point,Width in BP,Score,User Comments,User Edit\n"
            )

    else:
        raise RuntimeError("Unknown value for args.peaks_format")
    out_stream.close()

    for (fsa, fsa_index) in fsa_list:
        cverr(3, 'D: calling FSA %s' % fsa.filename)

        markers = fsa.panel.data['markers']

        if args.outfile != '-':
            out_stream = open(args.outfile, 'a')
        else:
            out_stream = sys.stdout

        for channel in fsa.channels:
            if channel.is_ladder():
                color = markers['x/ladder']['filter']
            else:
                color = markers['x/' + channel.dye]['filter']

            alleles = channel.get_alleles(broad_peaks_only=False)

            if is_verbosity(4):
                cout('Marker => %s | %s [%d]' %
                     (channel.marker.code, channel.dye, len(alleles)))
                cout("channel has alleles :", len(alleles))

            i = 1

            smeared_alleles = channel.smeared_alleles
            if (not args.merge) or channel.is_ladder():
                for p in alleles:
                    if args.peaks_format == 'standard':
                        out_stream.write(
                            '%6s\t%10s\t%3s\t%d\t%d\t%5i\t%3.2f\t%3.2f\n' %
                            (fsa_index, fsa.filename[:-4], color, p.rtime,
                             p.size, p.height, p.area, p.qscore))
                    else:
                        out_stream.write(
                            '"%s, %i",%s, %s, %f, %i, %i, %i, %i, %i, %i, %f, %i, %f, %i, %f, %f,,\n'
                            % (color, i, fsa.filename, p.type, p.size,
                               p.height, p.area, p.area_bp, p.area_bp_corr,
                               p.rtime, p.brtime, p.begin_bp, p.ertime,
                               p.end_bp, p.wrtime, p.width_bp, p.qscore))
                    i = i + 1

            else:
                if is_verbosity(4):
                    cout('Marker => %s | %s [%d]' %
                         (channel.marker.code, channel.dye,
                          len(smeared_alleles)))
                    cout("channel has smeared alleles :", len(smeared_alleles))
                i = 1
                for p in smeared_alleles:
                    out_stream.write(
                        '"%s, %i", %s, %s, %f, %i, %i, %i, %i, %i, %i, %f, %i, %f, %i, %f, %f, %i,,\n'
                        % (color, i, fsa.filename, p.type, p.size, p.height,
                           p.area, p.area_bp, p.area_bp_corr, p.rtime,
                           p.brtime, p.begin_bp, p.ertime, p.end_bp, p.wrtime,
                           p.width_bp, p.qscore, p.group))
                    i = i + 1

        out_stream.close()