Пример #1
0
def expand_liquid_oncotree(onco_tree):
    """
    Expand the _LIQUID_ oncotree node to all of its children

    :param onco_tree: Digraph of the Oncotree
    :returns liquid_children: All liquid tumor types in the Oncotree
             solid_children: All tumor types in the Oncotree minus "liquid_children"
    """

    # build the nodes for liquid.
    node1 = oncotreenx.lookup_text(onco_tree, "Lymph")
    node2 = oncotreenx.lookup_text(onco_tree, "Blood")

    nodes1 = list(nx.dfs_tree(onco_tree, node1))
    nodes2 = list(nx.dfs_tree(onco_tree, node2))
    nodes = list(set(nodes1).union(set(nodes2)))

    primary_tumors = get_primary_tumors()

    liquid_children_codes = []
    for n in nodes:
        liquid_children_codes.extend(list(nx.dfs_tree(onco_tree, n)))

    liquid_children = [onco_tree.node[nn]['text'] for nn in liquid_children_codes
                       if onco_tree.node[nn]['text'].strip() not in primary_tumors]

    # solid nodes are all other nodes
    all_nodes = set(list(onco_tree.nodes()))
    tmp_nodes = all_nodes - set(nodes)
    solid_children_codes = list(tmp_nodes)
    solid_children = [onco_tree.node[nn]['text'] for nn in solid_children_codes
                      if onco_tree.node[nn]['text'].strip() not in primary_tumors]

    return liquid_children, solid_children
Пример #2
0
def expand_liquid_oncotree(onco_tree):
    """
    Expand the _LIQUID_ oncotree node to all of its children

    :param onco_tree: Digraph of the Oncotree
    :returns liquid_children: All liquid tumor types in the Oncotree
             solid_children: All tumor types in the Oncotree minus "liquid_children"
    """

    # build the nodes for liquid.
    node1 = oncotreenx.lookup_text(onco_tree, "Lymph")
    node2 = oncotreenx.lookup_text(onco_tree, "Blood")

    nodes1 = list(nx.dfs_tree(onco_tree, node1))
    nodes2 = list(nx.dfs_tree(onco_tree, node2))
    nodes = list(set(nodes1).union(set(nodes2)))

    primary_tumors = get_primary_tumors()

    liquid_children_codes = []
    for n in nodes:
        liquid_children_codes.extend(list(nx.dfs_tree(onco_tree, n)))

    liquid_children = [onco_tree.nodes[nn]['text'] for nn in liquid_children_codes
                       if onco_tree.nodes[nn]['text'].strip() not in primary_tumors]

    # solid nodes are all other nodes
    all_nodes = set(list(onco_tree.nodes()))
    tmp_nodes = all_nodes - set(nodes)
    solid_children_codes = list(tmp_nodes)
    solid_children = [onco_tree.nodes[nn]['text'] for nn in solid_children_codes
                      if onco_tree.nodes[nn]['text'].strip() not in primary_tumors]

    return liquid_children, solid_children
Пример #3
0
    def extract_cancer_types(self):
        """
        Returns all cancer types located in the match tree

        :param g: DiGraph match tree
        :return: List of cancer types
        """

        diagnoses = []
        cancer_types_expanded = []
        primary_cancer_types = []
        excluded_cancer_types = []
        onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE)
        liquid_children_txt, solid_children_txt = expand_liquid_oncotree(onco_tree)

        # iterate through the graph
        for node_id in list(nx.dfs_postorder_nodes(self.g, source=1)):
            node = self.g.node[node_id]
            if node['type'] == 'clinical':
                if 'oncotree_primary_diagnosis' in node['value']:

                    diagnosis = node['value']['oncotree_primary_diagnosis']

                    n = oncotreenx.lookup_text(onco_tree, diagnosis.replace('!', ''))
                    children = list(nx.dfs_tree(onco_tree, n))

                    if diagnosis == '_SOLID_':
                        children_txt = solid_children_txt
                        primary_parent = 'All Solid Tumors'
                        parents_txt = ['All Solid Tumors']
                    elif diagnosis == '_LIQUID_':
                        children_txt = liquid_children_txt
                        primary_parent = 'All Liquid Tumors'
                        parents_txt = ['All Liquid Tumors']
                    else:
                        children_txt = [onco_tree.node[nn]['text'] for nn in children]

                        if n is not None:
                            parents, parents_txt, primary_parent = get_parents(onco_tree, n)
                        else:
                            parents_txt = []
                            primary_parent = ''

                    diagnoses.append(diagnosis)
                    if diagnosis.startswith('!'):
                        excluded_cancer_types.append(diagnosis.replace('!', ''))
                        excluded_cancer_types.extend(children_txt)
                    else:
                        primary_tumors = get_primary_tumors()
                        cancer_types_expanded.append(parse_diagnosis(diagnosis))
                        cancer_types_expanded.extend(children_txt)
                        cancer_types_expanded.extend([i for i in parents_txt if i.split()[0] not in primary_tumors])
                        primary_cancer_types.append(primary_parent)

        return {
            'diagnoses': list(set(i for i in diagnoses if i.strip() != 'root')),
            'cancer_types_expanded': list(set(i for i in cancer_types_expanded if i.strip() != 'root')),
            'primary_cancer_types': list(set(i for i in primary_cancer_types if i.strip() != 'root')),
            'excluded_cancer_types': list(set(i for i in excluded_cancer_types if i.strip() != 'root'))
        }
Пример #4
0
    def extract_cancer_types(self):
        """
        Returns all cancer types located in the match tree

        :param g: DiGraph match tree
        :return: List of cancer types
        """

        diagnoses = []
        cancer_types_expanded = []
        primary_cancer_types = []
        excluded_cancer_types = []
        onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE)
        liquid_children_txt, solid_children_txt = expand_liquid_oncotree(onco_tree)

        # iterate through the graph
        for node_id in list(nx.dfs_postorder_nodes(self.g, source=1)):
            node = self.g.nodes[node_id]
            if node['type'] == 'clinical':
                if 'oncotree_primary_diagnosis' in node['value']:

                    diagnosis = node['value']['oncotree_primary_diagnosis']

                    n = oncotreenx.lookup_text(onco_tree, diagnosis.replace('!', ''))
                    children = list(nx.dfs_tree(onco_tree, n))

                    if diagnosis == '_SOLID_':
                        children_txt = solid_children_txt
                        primary_parent = 'All Solid Tumors'
                        parents_txt = ['All Solid Tumors']
                    elif diagnosis == '_LIQUID_':
                        children_txt = liquid_children_txt
                        primary_parent = 'All Liquid Tumors'
                        parents_txt = ['All Liquid Tumors']
                    else:
                        children_txt = [onco_tree.nodes[nn]['text'] for nn in children]

                        if n is not None:
                            parents, parents_txt, primary_parent = get_parents(onco_tree, n)
                        else:
                            parents_txt = []
                            primary_parent = ''

                    diagnoses.append(diagnosis)
                    if diagnosis.startswith('!'):
                        excluded_cancer_types.append(diagnosis.replace('!', ''))
                        excluded_cancer_types.extend(children_txt)
                    else:
                        primary_tumors = get_primary_tumors()
                        cancer_types_expanded.append(parse_diagnosis(diagnosis))
                        cancer_types_expanded.extend(children_txt)
                        cancer_types_expanded.extend([i for i in parents_txt if i.split()[0] not in primary_tumors])
                        primary_cancer_types.append(primary_parent)

        return {
            'diagnoses': list(set(i for i in diagnoses if i.strip() != 'root')),
            'cancer_types_expanded': list(set(i for i in cancer_types_expanded if i.strip() != 'root')),
            'primary_cancer_types': list(set(i for i in primary_cancer_types if i.strip() != 'root')),
            'excluded_cancer_types': list(set(i for i in excluded_cancer_types if i.strip() != 'root'))
        }
Пример #5
0
    def test_text_lu(self):

        # get the ancestor.
        p = lookup_text(self.g, "Adrenal Gland")

        # make sure it is correct.
        assert 'ADRENAL_GLAND' == p
Пример #6
0
    def get_histology_type(self, cancer_type_text):
        """Convert oncotree code to histology type"""

        if cancer_type_text in histology_type_dict:
            return histology_type_dict[cancer_type_text]

        try:
            metamaintype = self.oncotree.node[oncotreenx.lookup_text(
                self.oncotree, cancer_type_text)]['metamaintype']
        except KeyError:
            print '## WARNING: \'%s\' is not a valid oncotree text input.' \
                  ' Sample was removed from analysis' % cancer_type_text
            return 'removeme'

        if metamaintype in histology_type_dict:
            return histology_type_dict[metamaintype]
        else:
            return 'removeme'
Пример #7
0
def prepare_criteria(item):

    onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE)

    c = {}
    clin_txt_1 = ""
    clin_txt_2_gender = ""
    clin_txt_2_age = ""
    if 'clinical_filter' in item:

        clin_tmp = json.dumps(item['clinical_filter'])
        for key, val in REREPLACEMENTS.items():
            clin_tmp = clin_tmp.replace(key, val)

        c = json.loads(clin_tmp)

        if 'GENDER' in item['clinical_filter']:
            clin_txt_2_gender = item['clinical_filter']['GENDER']

        if 'BIRTH_DATE' in item['clinical_filter']:
            op = next(iter(item['clinical_filter']['BIRTH_DATE'].keys()))
            val = next(iter(item['clinical_filter']['BIRTH_DATE'].values()))

            try:
                val = datetime.datetime.strptime(val.replace(" GMT", ""),
                                                 '%a, %d %b %Y %H:%M:%S')
            except ValueError:
                val = dateutil.parser.parse(val)

            # compute the age.
            today = datetime.date.today()
            tmp = today.year - val.year - ((today.month, today.day) <
                                           (val.month, val.day - 1))
            val = tmp

            if op.count("gte") > 0:
                clin_txt_2_age = "< %s" % val
            else:
                clin_txt_2_age = "> %s" % val

        # parse date-times.
        for key in ['BIRTH_DATE', 'REPORT_DATE']:

            if key not in c:
                continue

            # extract the expression value.
            lkey, lval = next(iter(c[key].keys())), next(iter(c[key].values()))

            try:
                c[key][lkey] = datetime.datetime.strptime(
                    lval.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S')
            except ValueError:
                c[key][lkey] = dateutil.parser.parse(lval)

        # expand oncotree
        if 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME' in item['clinical_filter']:

            txt = item['clinical_filter']['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']

            if txt == "_LIQUID_" or txt == "_SOLID_":

                node1 = oncotreenx.lookup_text(onco_tree, "Lymph")
                node2 = oncotreenx.lookup_text(onco_tree, "Blood")

                nodes1 = list(nx.dfs_tree(onco_tree, node1))
                nodes2 = list(nx.dfs_tree(onco_tree, node2))
                nodes = list(set(nodes1).union(set(nodes2)))

                if txt == "_SOLID_":

                    all_nodes = set(list(onco_tree.nodes()))
                    tmp_nodes = all_nodes - set(nodes)
                    nodes = list(tmp_nodes)

                clin_txt_1 = "%s cancers" % txt.replace("_", "").title()

            else:

                clin_txt_1 = "%s" % txt
                node = oncotreenx.lookup_text(onco_tree, txt)
                if onco_tree.has_node(node):
                    nodes = list(nx.dfs_tree(onco_tree, node))

            nodes_txt = [onco_tree.node[n]['text'] for n in nodes]
            c['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] = {'$in': nodes_txt}

    g = {}
    gen_txt = []
    if 'genomic_filter' in item:

        gen_tmp = json.dumps(item['genomic_filter'])
        for key, val in REREPLACEMENTS.items():
            gen_tmp = gen_tmp.replace(key, val)

        g = json.loads(gen_tmp)

        # add TRUE_HUGO_SYMBOL value mutational signature filter queries
        if 'TRUE_HUGO_SYMBOL' in g and g['TRUE_HUGO_SYMBOL'] == {'$in': ['']}:
            g['TRUE_HUGO_SYMBOL'] = None

        sv_test = False
        mut_test = False
        cnv_test = False
        if 'VARIANT_CATEGORY' in item['genomic_filter']:
            variant_category = item['genomic_filter']['VARIANT_CATEGORY']
            if isinstance(variant_category, dict):
                for x in variant_category.values():
                    if "SV" in set(x):
                        sv_test = True
                    if "CNV" in set(x):
                        cnv_test = True
                    if "MUTATION" in set(x):
                        mut_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'SV':
                sv_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'CNV':
                cnv_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'MUTATION':
                mut_test = True

        # build text.
        exon_txt = ""
        protein_txt = ""
        if mut_test:
            gen_txt.append("Mutation")

            if 'TRUE_EXON_CHANGE' in item['genomic_filter']:
                exon_txt = item['genomic_filter']['TRUE_EXON_CHANGE']

            if 'TRUE_PROTEIN_CHANGE' in item['genomic_filter']:
                protein_txt = item['genomic_filter']['TRUE_PROTEIN_CHANGE']

        if cnv_test:
            if 'CNV_CALL' in g:
                if isinstance(g['CNV_CALL'], dict):
                    gen_txt += next(iter(g['CNV_CALL'].values()))
                else:
                    gen_txt.append(g['CNV_CALL'])
        if sv_test:
            gen_txt.append("Structural rearrangement")

        if 'MMR_STATUS' in item['genomic_filter']:
            gen_txt.append(item['genomic_filter']['MMR_STATUS'])

        if 'TABACCO_STATUS' in item['genomic_filter']:
            gen_txt.append('Tobacco Mutational Signature')

        if 'TEMOZOLOMIDE_STATUS' in item['genomic_filter']:
            gen_txt.append('Temozolomide Mutational Signature')

        if 'POLE_STATUS' in item['genomic_filter']:
            gen_txt.append('PolE Mutational Signature')

        if 'APOBEC_STATUS' in item['genomic_filter']:
            gen_txt.append('APOBEC Mutational Signature')

        if 'UVA_STATUS' in item['genomic_filter']:
            gen_txt.append('UVA Mutational Signature')

        clauses = []
        if mut_test:

            clause = {
                'VARIANT_CATEGORY': 'MUTATION',
                'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL']
            }

            if 'WILDTYPE' in g:
                clause['WILDTYPE'] = g['WILDTYPE']

            if 'TRUE_PROTEIN_CHANGE' in g:
                clause['TRUE_PROTEIN_CHANGE'] = g['TRUE_PROTEIN_CHANGE']

            clauses.append(clause)

        if cnv_test:

            clause = {
                'VARIANT_CATEGORY': 'CNV',
                'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL'],
            }

            if 'CNV_CALL' in g:
                clause['CNV_CALL'] = g['CNV_CALL']

            if 'WILDTYPE' in g:
                clause['WILDTYPE'] = g['WILDTYPE']

            clauses.append(clause)

        if sv_test:

            true_hugo = item['genomic_filter']['TRUE_HUGO_SYMBOL']

            if isinstance(true_hugo, dict):
                genes = next(iter(true_hugo.values()))
            else:
                genes = [true_hugo]

            to_add = list()
            for gene in genes:
                if gene in synonyms:
                    to_add += synonyms[gene]

            genes = genes + to_add

            abc = '|'.join([
                rf"(.*\W{gene}\W.*)|(^{gene}\W.*)|(.*\W{gene}$)"
                for gene in genes
            ])

            clauses.append({'STRUCTURAL_VARIANT_COMMENT': {"$regex": abc}})
            clauses.append({'LEFT_PARTNER_GENE': {'$in': genes}})
            clauses.append({'RIGHT_PARTNER_GENE': {'$in': genes}})

        if len(clauses) > 0:
            g = {"$or": clauses}

        for key in item['genomic_filter']:

            special_clauses = {
                'STRUCTURAL_VARIANT_COMMENT', 'VARIANT_CATEGORY',
                'TRUE_HUGO_SYMBOL', 'CNV_CALL', 'WILDTYPE',
                'TRUE_PROTEIN_CHANGE'
            }
            if key in special_clauses:
                continue

            g[key] = item['genomic_filter'][key]

        get_recursively(g, "GMT")
        if 'TRUE_HUGO_SYMBOL' in item['genomic_filter']:
            if isinstance(item['genomic_filter']['TRUE_HUGO_SYMBOL'], dict):
                genes = next(
                    iter(item['genomic_filter']['TRUE_HUGO_SYMBOL'].values()))
            else:
                genes = [item['genomic_filter']['TRUE_HUGO_SYMBOL']]

            genes = [str(i) for i in genes]
            genes = ', '.join(genes)

            if len(gen_txt) > 1:
                gen_txt = "%s: %s" % (genes, ', '.join(gen_txt))
            else:

                if exon_txt == "" and protein_txt == "":
                    gen_txt = "%s %s" % (genes, ', '.join(gen_txt))
                elif exon_txt != "":
                    gen_txt = "%s exon %s" % (genes, exon_txt)
                else:
                    gen_txt = "%s %s" % (genes, protein_txt)

    return c, g, (gen_txt, [clin_txt_1, clin_txt_2_age, clin_txt_2_gender])
Пример #8
0
def prepare_criteria(item):

    onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE)

    c = {}
    clin_txt_1 = ""
    clin_txt_2_gender = ""
    clin_txt_2_age = ""
    if 'clinical_filter' in item:

        clin_tmp = json.dumps(item['clinical_filter'])
        for key, val in REREPLACEMENTS.items():
            clin_tmp = clin_tmp.replace(key, val)

        c = json.loads(clin_tmp)

        if 'GENDER' in item['clinical_filter']:
            clin_txt_2_gender = item['clinical_filter']['GENDER']

        if 'BIRTH_DATE' in item['clinical_filter']:
            op = item['clinical_filter']['BIRTH_DATE'].keys()[0]
            val = item['clinical_filter']['BIRTH_DATE'].values()[0]

            try:
                val = datetime.datetime.strptime(val.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S')
            except ValueError:
                val = dateutil.parser.parse(val)

            # compute the age.
            today = datetime.date.today()
            tmp = today.year - val.year - ((today.month, today.day) < (val.month, val.day - 1))
            val = tmp

            if op.count("gte") > 0:
                clin_txt_2_age = "< %s" % val
            else:
                clin_txt_2_age = "> %s" % val

        # parse date-times.
        for key in ['BIRTH_DATE', 'REPORT_DATE']:

            if key not in c:
                continue

            # extract the expression value.
            lkey, lval = c[key].keys()[0], c[key].values()[0]

            try:
                c[key][lkey] = datetime.datetime.strptime(lval.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S')
            except ValueError:
                c[key][lkey] = dateutil.parser.parse(lval)

        # expand oncotree
        if 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME' in item['clinical_filter']:

            txt = item['clinical_filter']['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']

            if txt == "_LIQUID_" or txt == "_SOLID_":

                node1 = oncotreenx.lookup_text(onco_tree, "Lymph")
                node2 = oncotreenx.lookup_text(onco_tree, "Blood")

                nodes1 = list(nx.dfs_tree(onco_tree, node1))
                nodes2 = list(nx.dfs_tree(onco_tree, node2))
                nodes = list(set(nodes1).union(set(nodes2)))

                if txt == "_SOLID_":

                    all_nodes = set(list(onco_tree.nodes()))
                    tmp_nodes = all_nodes - set(nodes)
                    nodes = list(tmp_nodes)

                clin_txt_1 = "%s cancers" % txt.replace("_", "").title()

            else:

                clin_txt_1 = "%s" % txt
                node = oncotreenx.lookup_text(onco_tree, txt)
                if onco_tree.has_node(node):
                    nodes = list(nx.dfs_tree(onco_tree, node))

            nodes_txt = [onco_tree.node[n]['text'] for n in nodes]
            c['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] = {'$in': nodes_txt}

    g = {}
    gen_txt = []
    if 'genomic_filter' in item:

        gen_tmp = json.dumps(item['genomic_filter'])
        for key, val in REREPLACEMENTS.items():
            gen_tmp = gen_tmp.replace(key, val)

        g = json.loads(gen_tmp)

        # add TRUE_HUGO_SYMBOL value mutational signature filter queries
        if 'TRUE_HUGO_SYMBOL' in g and g['TRUE_HUGO_SYMBOL'] == {'$in': ['']}:
            g['TRUE_HUGO_SYMBOL'] = None

        sv_test = False
        mut_test = False
        cnv_test = False
        if 'VARIANT_CATEGORY' in item['genomic_filter']:
            variant_category = item['genomic_filter']['VARIANT_CATEGORY']
            if isinstance(variant_category, dict):
                for x in variant_category.values():
                    if "SV" in set(x):
                        sv_test = True
                    if "CNV" in set(x):
                        cnv_test = True
                    if "MUTATION" in set(x):
                        mut_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'SV':
                sv_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'CNV':
                cnv_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'MUTATION':
                mut_test = True

        # build text.
        exon_txt = ""
        protein_txt = ""
        if mut_test:
            gen_txt.append("Mutation")

            if 'TRUE_EXON_CHANGE' in item['genomic_filter']:
                exon_txt = item['genomic_filter']['TRUE_EXON_CHANGE']

            if 'TRUE_PROTEIN_CHANGE' in item['genomic_filter']:
                protein_txt = item['genomic_filter']['TRUE_PROTEIN_CHANGE']

        if cnv_test:
            if 'CNV_CALL' in g:
                if isinstance(g['CNV_CALL'], dict):
                    gen_txt += g['CNV_CALL'].values()[0]
                else:
                    gen_txt.append(g['CNV_CALL'])
        if sv_test:
            gen_txt.append("Structural rearrangement")

        if 'MMR_STATUS' in item['genomic_filter']:
            gen_txt.append(item['genomic_filter']['MMR_STATUS'])

        if 'TABACCO_STATUS' in item['genomic_filter']:
            gen_txt.append('Tobacco Mutational Signature')

        if 'TEMOZOLOMIDE_STATUS' in item['genomic_filter']:
            gen_txt.append('Temozolomide Mutational Signature')

        if 'POLE_STATUS' in item['genomic_filter']:
            gen_txt.append('PolE Mutational Signature')

        if 'APOBEC_STATUS' in item['genomic_filter']:
            gen_txt.append('APOBEC Mutational Signature')

        if 'UVA_STATUS' in item['genomic_filter']:
            gen_txt.append('UVA Mutational Signature')

        clauses = []
        if mut_test:

            clause = {
                'VARIANT_CATEGORY': 'MUTATION',
                'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL']
            }

            if 'WILDTYPE' in g:
                clause['WILDTYPE'] = g['WILDTYPE']

            if 'TRUE_PROTEIN_CHANGE' in g:
                clause['TRUE_PROTEIN_CHANGE'] = g['TRUE_PROTEIN_CHANGE']

            clauses.append(clause)

        if cnv_test:

            clause = {
                'VARIANT_CATEGORY': 'CNV',
                'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL'],
            }

            if 'CNV_CALL' in g:
                clause['CNV_CALL'] = g['CNV_CALL']

            if 'WILDTYPE' in g:
                clause['WILDTYPE'] = g['WILDTYPE']

            clauses.append(clause)

        if sv_test:

            true_hugo = item['genomic_filter']['TRUE_HUGO_SYMBOL']

            if isinstance(true_hugo, dict):
                genes = true_hugo.values()[0]
            else:
                genes = [true_hugo]

            to_add = list()
            for gene in genes:
                if gene in synonyms:
                    to_add += synonyms[gene]

            genes = genes + to_add

            sv_clauses = []
            for gene in genes:
                abc = "(.*\W%s\W.*)|(^%s\W.*)|(.*\W%s$)" % (gene, gene, gene)
                sv_clauses.append(re.compile(abc, re.IGNORECASE))

            clause = {
                'STRUCTURAL_VARIANT_COMMENT': {"$in": sv_clauses}
            }
            clauses.append(clause)

        if len(clauses) > 0:
            g = {
                "$or": clauses
            }

        for key in item['genomic_filter']:

            special_clauses = {
                'STRUCTURAL_VARIANT_COMMENT',
                'VARIANT_CATEGORY',
                'TRUE_HUGO_SYMBOL',
                'CNV_CALL',
                'WILDTYPE',
                'TRUE_PROTEIN_CHANGE'
            }
            if key in special_clauses:
                continue

            g[key] = item['genomic_filter'][key]

        get_recursively(g, "GMT")
        if 'TRUE_HUGO_SYMBOL' in item['genomic_filter']:
            if isinstance(item['genomic_filter']['TRUE_HUGO_SYMBOL'], dict):
                genes = item['genomic_filter']['TRUE_HUGO_SYMBOL'].values()[0]
            else:
                genes = [item['genomic_filter']['TRUE_HUGO_SYMBOL']]

            genes = [str(i) for i in genes]
            genes = ', '.join(genes)

            if len(gen_txt) > 1:
                gen_txt = "%s: %s" % (genes, ', '.join(gen_txt))
            else:

                if exon_txt == "" and protein_txt == "":
                    gen_txt = "%s %s" % (genes, ', '.join(gen_txt))
                elif exon_txt != "":
                    gen_txt = "%s exon %s" % (genes, exon_txt)
                else:
                    gen_txt = "%s %s" % (genes, protein_txt)

    return c, g, (gen_txt, [clin_txt_1, clin_txt_2_age, clin_txt_2_gender])