示例#1
0
def align_other_clinical(a):

    # extract the clinical id.
    clinical_id = a['_id']

    # lookup any matches.
    cbio = CBioEngine(settings.MONGO_URI,
                      settings.MONGO_DBNAME,
                      data_model.match_schema,
                      muser=settings.MONGO_USERNAME,
                      mpass=settings.MONGO_PASSWORD,
                      collection_clinical=settings.COLLECTION_CLINICAL,
                      collection_genomic=settings.COLLECTION_GENOMIC)
    clinical_db = cbio._c

    # look for record with sample MRN.
    related = list(clinical_db.find({"MRN": a['MRN']}))

    # remove self.
    tmp = []
    for clinical in related:

        for nm in ["FIRST_NAME", "LAST_NAME", "FIRST_LAST", "LAST_FIRST"]:
            del clinical[nm]

        if clinical['_id'] == a['_id']:
            continue
        tmp.append(clinical)

    # add them to record.
    a['RELATED'] = tmp

    # remove patient name
    for nm in ["FIRST_NAME", "LAST_NAME", "FIRST_LAST", "LAST_FIRST"]:
        del a[nm]
示例#2
0
def rerun_filters(dpi=None):
    """ re-runs all filters against new data. preserves options set on
    old matches.

    :return: count of new matches
    """

    # get the database links.
    match_db = database.get_collection('match')
    filter_db = database.get_collection('filter')

    # create the object.
    cbio = CBioEngine(settings.MONGO_URI,
                      settings.MONGO_DBNAME,
                      data_model.match_schema,
                      muser=settings.MONGO_USERNAME,
                      mpass=settings.MONGO_PASSWORD,
                      collection_clinical=settings.COLLECTION_CLINICAL,
                      collection_genomic=settings.COLLECTION_GENOMIC)

    query = {
        'status': 1,
        'temporary': False,
        'trial_watch': {
            '$exists': False
        }
    }
    filters = list(filter_db.find(query))
    for filter_ in filters:

        # lots of logging.
        logging.info("rerun_filters: filter: %s" % filter_['_id'])

        # prepare the filters.
        c, g, txt = prepare_criteria(filter_)

        # execute the match.
        cbio.match(c=c, g=g)

        if cbio.match_df is not None and cbio.genomic_df is not None and cbio.clinical_df is not None:
            logging.info(
                "rerun_filters: new matches: match=%d, genomic=%d, clinical=%d"
                % (len(cbio.match_df), len(
                    cbio.genomic_df), len(cbio.clinical_df)))

        # get existing matches for this filter.
        matches = list(match_db.find({'FILTER_ID': ObjectId(filter_['_id'])}))

        rec_cnt = 0
        for m in matches:
            rec_cnt += len(m['VARIANTS'])

        logging.info("rerun_filters: exisiting: %d %d" %
                     (len(matches), rec_cnt))

        # parse the old matches.
        clinical_old_id = set()
        old_lu = {}
        match_lu = {}
        for match in matches:

            # get the clincal id.
            clinical_id = match['CLINICAL_ID']

            # now build tuples of variants.
            for genomic_id in match['VARIANTS']:

                # make pair
                pair = (clinical_id, genomic_id)
                clinical_old_id.add(pair)

                # build id lookup.
                old_lu[pair] = match['_id']

                # cache matches.
                match_lu[pair] = match

        # parse the new matches.
        clinical_new_id = set()
        new_lu = {}
        i = 0
        for match in cbio.match_iter():

            # simplify.
            clinical_id = match['CLINICAL_ID']
            genomic_id = match['GENOMIC_ID']

            # build set.
            pair = (clinical_id, genomic_id)
            clinical_new_id.add(pair)

            # cache matches.
            match_lu[pair] = match

            # build lookup.
            new_lu[pair] = i
            i += 1

        # find the ones which need to be deleted and delete them.
        to_delete = clinical_old_id - clinical_new_id
        logging.info("rerun_filters: removing: %d" % len(to_delete))
        updated = list()
        for pair in to_delete:

            # extract ids
            match_id = old_lu[pair]
            match = match_lu[pair]

            # find the variant.
            good = list()
            hit = False
            for v in match['VARIANTS']:
                if v != pair[1]:
                    good.append(v)
                else:
                    hit = True

            # update it if necessary.
            if hit:

                # check if will empty this.
                if len(good) == 0:

                    # delete it.
                    match_db.delete_one({'_id': match_id})
                else:

                    # just update it.
                    match_db.update({"_id": match_id},
                                    {"$set": {
                                        "VARIANTS": good
                                    }})

                    # update the local one to make sure we delete all variants
                    match['VARIANTS'] = good

        # find the intersection and remove them from data frame.
        remove_frame = clinical_new_id.intersection(clinical_old_id)
        bad_list = []
        for pair in remove_frame:

            # lookup index.
            idx = new_lu[pair]
            bad_list.append(idx)

        logging.info("rerun_filters: skipping: %d" % len(bad_list))

        # remove them.
        if cbio.match_df is not None and len(cbio.match_df) > 0:
            cbio.match_df.drop(cbio.match_df.index[bad_list], inplace=True)

        # insert the counts.
        count_matches(cbio, filter_)

        # insert the matches if not temporary.
        insert_matches(cbio, filter_, from_filter=False, dpi=dpi)
示例#3
0
def rerun_filters(dpi=None):
    """ re-runs all filters against new data. preserves options set on
    old matches.

    :return: count of new matches
    """

    # get the database links.
    match_db = database.get_collection('match')
    filter_db = database.get_collection('filter')

    # create the object.
    cbio = CBioEngine(settings.MONGO_URI,
                      settings.MONGO_DBNAME,
                      data_model.match_schema,
                      muser=settings.MONGO_USERNAME,
                      mpass=settings.MONGO_PASSWORD,
                      collection_clinical=settings.COLLECTION_CLINICAL,
                      collection_genomic=settings.COLLECTION_GENOMIC)

    query = {'status': 1, 'temporary': False, 'trial_watch': {'$exists': False}}
    filters = list(filter_db.find(query))
    for filter_ in filters:

        # lots of logging.
        logging.info("rerun_filters: filter: %s" % filter_['_id'])

        # prepare the filters.
        c, g, txt = prepare_criteria(filter_)

        # execute the match.
        cbio.match(c=c, g=g)

        if cbio.match_df is not None and cbio.genomic_df is not None and cbio.clinical_df is not None:
            logging.info("rerun_filters: new matches: match=%d, genomic=%d, clinical=%d" % (len(cbio.match_df), len(cbio.genomic_df), len(cbio.clinical_df)))

        # get existing matches for this filter.
        matches = list(match_db.find({'FILTER_ID': ObjectId(filter_['_id'])}))

        rec_cnt = 0
        for m in matches:
            rec_cnt += len(m['VARIANTS'])

        logging.info("rerun_filters: exisiting: %d %d" % (len(matches), rec_cnt))

        # parse the old matches.
        clinical_old_id = set()
        old_lu = {}
        match_lu = {}
        for match in matches:

            # get the clincal id.
            clinical_id = match['CLINICAL_ID']

            # now build tuples of variants.
            for genomic_id in match['VARIANTS']:

                # make pair
                pair = (clinical_id, genomic_id)
                clinical_old_id.add(pair)

                # build id lookup.
                old_lu[pair] = match['_id']

                # cache matches.
                match_lu[pair] = match

        # parse the new matches.
        clinical_new_id = set()
        new_lu = {}
        i = 0
        for match in cbio.match_iter():

            # simplify.
            clinical_id = match['CLINICAL_ID']
            genomic_id = match['GENOMIC_ID']

            # build set.
            pair = (clinical_id, genomic_id)
            clinical_new_id.add(pair)

            # cache matches.
            match_lu[pair] = match

            # build lookup.
            new_lu[pair] = i
            i += 1

        # find the ones which need to be deleted and delete them.
        to_delete = clinical_old_id - clinical_new_id
        logging.info("rerun_filters: removing: %d" % len(to_delete))
        updated = list()
        for pair in to_delete:

            # extract ids
            match_id = old_lu[pair]
            match = match_lu[pair]

            # find the variant.
            good = list()
            hit = False
            for v in match['VARIANTS']:
                if v != pair[1]:
                    good.append(v)
                else:
                    hit = True

            # update it if necessary.
            if hit:

                # check if will empty this.
                if len(good) == 0:

                    # delete it.
                    match_db.delete_one({'_id': match_id})
                else:

                    # just update it.
                    match_db.update({"_id": match_id}, {"$set": {"VARIANTS": good}})

                    # update the local one to make sure we delete all variants
                    match['VARIANTS'] = good

        # find the intersection and remove them from data frame.
        remove_frame = clinical_new_id.intersection(clinical_old_id)
        bad_list = []
        for pair in remove_frame:

            # lookup index.
            idx = new_lu[pair]
            bad_list.append(idx)

        logging.info("rerun_filters: skipping: %d" % len(bad_list))

        # remove them.
        if cbio.match_df is not None and len(cbio.match_df) > 0:
            cbio.match_df.drop(cbio.match_df.index[bad_list], inplace=True)

        # insert the counts.
        count_matches(cbio, filter_)

        # insert the matches if not temporary.
        insert_matches(cbio, filter_, from_filter=False, dpi=dpi)
示例#4
0
def find_match(items):
    """ computes matches and saves results.
    called after insertion in DB is complete

    :param items: dict
    """

    db = app.data.driver.db
    cbio = CBioEngine(settings.MONGO_URI,
                      settings.MONGO_DBNAME,
                      data_model.match_schema,
                      muser=settings.MONGO_USERNAME,
                      mpass=settings.MONGO_PASSWORD,
                      collection_clinical=settings.COLLECTION_CLINICAL,
                      collection_genomic=settings.COLLECTION_GENOMIC)

    for item in items:

        c, g, txt = miner.prepare_criteria(item)
        gen_txt, clin_txt = txt
        cancer, age, gender = clin_txt

        c_test = cancer == ""
        g_test = gender == ""
        a_test = age == ""

        if not c_test:
            description = "%s in %s" % (gen_txt, cancer)
        else:
            description = gen_txt

        if not g_test and a_test:
            description = "%s, Gender: %s" % (description, gender)

        elif not g_test and not a_test:
            description = "%s, Gender: %s, Age %s" % (description, gender, age)

        elif g_test and not a_test:
            description = "%s, Age %s" % (description, age)

        if isinstance(description, list) and len(description) == 0:
            description = ''

        query = {"_id": item["_id"]}
        update = {"$set": {"description": description}}
        _ = db.filter.update_one(query, update)
        item['description'] = description

        # only recompute match if there was an update.
        updated = miner.detect_update(cbio, item)
        if updated:
            miner.remove_matches(cbio, item)
            cbio.match(c=c, g=g)
            miner.count_matches(cbio, item)
            dpi = get_data_push_id(db)
            if not item["temporary"]:
                miner.insert_matches(cbio, item, dpi=dpi)

        else:
            # pass along status variable to matches.
            miner.update_match_status(cbio, item)
示例#5
0
def align_matches_genomic(a):

    # short circuit.
    if len(a['_items']) == 0:
        return

    # get the user.
    if settings.NO_AUTH:
        logging.info("NO AUTH enabled. align_matches_genomic")
        accounts = app.data.driver.db['user']
        user = accounts.find_one({"last_name": "Doe"})
    else:
        user = app.auth.get_request_auth_value()

    # extract the clinical id.
    clinical_id = a['_items'][0]['CLINICAL_ID']

    # lookup any matches.
    cbio = CBioEngine(settings.MONGO_URI,
                      settings.MONGO_DBNAME,
                      data_model.match_schema,
                      muser=settings.MONGO_USERNAME,
                      mpass=settings.MONGO_PASSWORD,
                      collection_clinical=settings.COLLECTION_CLINICAL,
                      collection_genomic=settings.COLLECTION_GENOMIC)

    match_db = cbio.connection[cbio.mongo_dbname]['match']
    filter_db = cbio.connection[cbio.mongo_dbname]['filter']

    variants = dict()
    for match in match_db.find({"CLINICAL_ID": clinical_id}):
        for variant_id in match['VARIANTS']:
            if variant_id not in variants:
                variants[variant_id] = list()

            variants[variant_id].append(match['FILTER_ID'])

    for item in a['_items']:
        if item['_id'] in variants:
            for filter_id in variants[item['_id']]:

                filter_doc = filter_db.find_one(filter_id)
                if filter_doc is None:
                    continue

                # check status.
                if filter_doc['status'] != 1:
                    continue

                # check ownership.
                if filter_doc['TEAM_ID'] not in set(user['teams']):
                    continue

                # embed this in filter.
                if 'FILTER' not in item:
                    item['FILTER'] = list()

                item['FILTER'].append(filter_doc)

        # merge genetic event with cytoband
        if 'GENETIC_EVENT' in item and 'CYTOBAND' in item and item[
                'GENETIC_EVENT'] is not None:
            item['CYTOBAND'] = '%s %s' % (item['CYTOBAND'],
                                          item['GENETIC_EVENT'])
示例#6
0
def find_match(items):
    """ computes matches and saves results.
    called after insertion in DB is complete

    :param items: dict
    """

    db = app.data.driver.db
    cbio = CBioEngine(settings.MONGO_URI,
                      settings.MONGO_DBNAME,
                      data_model.match_schema,
                      muser=settings.MONGO_USERNAME,
                      mpass=settings.MONGO_PASSWORD,
                      collection_clinical=settings.COLLECTION_CLINICAL,
                      collection_genomic=settings.COLLECTION_GENOMIC)

    for item in items:

        c, g, txt = miner.prepare_criteria(item)
        gen_txt, clin_txt = txt
        cancer, age, gender = clin_txt

        c_test = cancer == ""
        g_test = gender == ""
        a_test = age == ""

        if not c_test:
            description = "%s in %s" % (gen_txt, cancer)
        else:
            description = gen_txt

        if not g_test and a_test:
            description = "%s, Gender: %s" % (description, gender)

        elif not g_test and not a_test:
            description = "%s, Gender: %s, Age %s" % (description, gender, age)

        elif g_test and not a_test:
            description = "%s, Age %s" % (description, age)

        if isinstance(description, list) and len(description) == 0:
            description = ''

        query = {"_id": item["_id"]}
        update = {"$set": {"description": description}}
        _ = db.filter.update_one(query, update)
        item['description'] = description

        # only recompute match if there was an update.
        updated = miner.detect_update(cbio, item)
        if updated:
            miner.remove_matches(cbio, item)
            cbio.match(c=c, g=g)
            miner.count_matches(cbio, item)
            dpi = get_data_push_id(db)
            if not item["temporary"]:
                miner.insert_matches(cbio, item, dpi=dpi)

        else:
            # pass along status variable to matches.
            miner.update_match_status(cbio, item)