Пример #1
0
def run_fix_sequences(session, args):
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3)

    indexes = set()
    props = IdentificationProps(**args.__dict__)
    for sample in session.query(Sample):
        sequences = process_sample(session, sample, indexes, args.temp,
                                   v_germlines, j_germlines, args.nproc)
        add_sequences_from_sample(session, sample, sequences, props)
        remove_duplicates(session, sample)

    logger.info('Updating copy numbers')
    session.connection(mapper=Sequence).execute(
        text('''
        UPDATE
            sequences
        SET
            copy_number = 1 + (
                SELECT
                    COUNT(*)
                FROM
                    duplicate_sequences
                WHERE
                    duplicate_seq_ai = ai
            )
    '''))
Пример #2
0
def run_import(session, args):
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len)

    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')

    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        return

    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.warn_missing, args.sample_dir)
        except MetadataException as ex:
            logger.error(ex.message)
            return

    props = IdentificationProps(**args.__dict__)
    for sample_name in sorted(metadata.keys()):
        sample = create_sample(session, metadata[sample_name])
        if sample:
            path = os.path.join(args.sample_dir,
                                metadata[sample_name]['file_name'])
            with open(path) as fh:
                read_file(session, args.format, fh, sample, v_germlines,
                          j_germlines, props)
Пример #3
0
def run_import(session, args, remaps=None):
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len, args.min_anchor_len)

    study, new = funcs.get_or_create(session, Study, name=args.study_name)

    if new:
        logger.info('Created new study "{}"'.format(study.name))
        session.commit()

    sample, new = funcs.get_or_create(session,
                                      Sample,
                                      name=args.sample_name,
                                      study=study)
    if new:
        sample.date = args.date
        logger.info('Created new sample "{}"'.format(sample.name))
        for key in ('subset', 'tissue', 'disease', 'lab', 'experimenter',
                    'ig_class', 'v_primer', 'j_primer'):
            setattr(sample, key, vars(args).get(key, None))
        subject, new = funcs.get_or_create(session,
                                           Subject,
                                           study=study,
                                           identifier=args.subject)
        sample.subject = subject
        session.commit()
    else:
        logger.error('Sample "{}" already exists'.format(args.sample_name))
        return

    with open(args.input_file) as fh:
        read_file(session, fh, sample, v_germlines, j_germlines, args, remaps)
Пример #4
0
def run_fix_sequences(session, args):
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3)

    indexes = set()
    props = IdentificationProps(**args.__dict__)
    samples = session.query(Sample)
    if args.sample_ids:
        samples = samples.filter(Sample.id.in_(args.sample_ids))
    for sample in samples:
        sequences = process_sample(session, sample, indexes, args.temp,
                                   v_germlines, j_germlines, args.nproc)
        add_sequences_from_sample(session, sample, sequences, props)
        remove_duplicates(session, sample)
Пример #5
0
def run_identify(session, args):
    mod_log.make_mod('identification',
                     session=session,
                     commit=True,
                     info=vars(args))
    session.close()
    # Load the germlines from files
    v_germlines = VGermlines(args.v_germlines)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len, args.min_anchor_len)
    tasks = concurrent.TaskQueue()

    # If metadata is not specified, assume it is "metadata." in the
    # directory
    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')

    # Verify the metadata file exists
    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        return

    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.sample_dir)
        except MetadataException as ex:
            logger.error(ex.message)
            return

    # Create the tasks for each file
    for sample_name in sorted(metadata.keys()):
        tasks.add_task({
            'path':
            os.path.join(args.sample_dir, metadata[sample_name]['file_name']),
            'meta':
            metadata[sample_name]
        })

    props = IdentificationProps(**args.__dict__)
    lock = mp.Lock()
    for i in range(0, min(args.nproc, tasks.num_tasks())):
        worker_session = config.init_db(args.db_config)
        tasks.add_worker(
            IdentificationWorker(worker_session, v_germlines, j_germlines,
                                 props, lock))

    tasks.start()
Пример #6
0
def run_identify(session, args):
    mod_log.make_mod('identification', session=session, commit=True,
                     info=vars(args))
    # Load the germlines from files
    v_germlines = VGermlines(args.v_germlines,
                             ties=args.ties and not args.genotyping)
    j_germlines = JGermlines(args.j_germlines, args.upstream_of_cdr3,
                             args.anchor_len, args.min_anchor_len,
                             ties=args.ties and not args.genotyping)

    # If metadata is not specified, assume it is "metadata." in the
    # directory
    meta_fn = args.metadata if args.metadata else os.path.join(
        args.sample_dir, 'metadata.tsv')

    # Verify the metadata file exists
    if not os.path.isfile(meta_fn):
        logger.error('Metadata file not found.')
        sys.exit(-1)

    with open(meta_fn, 'rU') as fh:
        try:
            metadata = parse_metadata(session, fh, args.warn_existing,
                                      args.warn_missing, args.sample_dir)
        except MetadataException as ex:
            logger.error(ex)
            sys.exit(-1)

    session.close()
    # Create the tasks for each file
    props = IdentificationProps(**args.__dict__)
    for sample_name in sorted(metadata.keys()):
        process_sample(
            args.db_config, v_germlines, j_germlines,
            os.path.join(
                args.sample_dir,
                metadata[sample_name]['file_name']
            ),
            metadata[sample_name],
            props,
            args.nproc
        )