def validate_mutation_1(uniprot_id, mutation): """Select Provean; assert length > 0 """ logger.debug(helper.underline("Validating that we have provean...")) sql_query = """\ select 1 from {db_schema}.provean where uniprot_id = '{uniprot_id}' and provean_supset_filename is not null; """.format( uniprot_id=uniprot_id, db_schema=conf.CONFIGS["db_schema"] ) logger.debug(sql_query) df1 = pd.read_sql_query(sql_query, conf.CONFIGS["engine"]) logger.debug(df1.head(2)) # logger.debug(helper.underline("And that we have at least one domain with a template...")) sql_query = """\ select 1 from {db_schema}.uniprot_domain join {db_schema}.uniprot_domain_template using (uniprot_domain_id) where uniprot_id = '{uniprot_id}'; """.format( uniprot_id=uniprot_id, db_schema=conf.CONFIGS["db_schema"] ) logger.debug(sql_query) df2 = pd.read_sql_query(sql_query, conf.CONFIGS["engine"]) logger.debug(df2.head(2)) assert len(df1) >= 1 or len(df2) == 0
def validate_mutation_5(uniprot_id, mutation): """Select domain pairs where we don't have mutatons even though we should; assert length 0 """ logger.debug(helper.underline("Validating that we have domain pair mutations...")) sql_query = """\ SELECT * FROM {db_schema}.uniprot_domain_pair udp JOIN {db_schema}.uniprot_domain ud1 on (ud1.uniprot_domain_id = udp.uniprot_domain_id_1) JOIN {db_schema}.uniprot_domain ud2 on (ud2.uniprot_domain_id = udp.uniprot_domain_id_2) JOIN {db_schema}.uniprot_domain_pair_template udpt using (uniprot_domain_pair_id) JOIN {db_schema}.uniprot_domain_pair_model udpm using (uniprot_domain_pair_id) LEFT JOIN {db_schema}.uniprot_domain_pair_mutation udpmut ON (udpmut.uniprot_domain_pair_id = udp.uniprot_domain_pair_id AND udpmut.uniprot_id = '{uniprot_id}' and udpmut.mutation = '{mutation}') WHERE ((ud1.uniprot_id = '{uniprot_id}' AND {db_schema}.mutation_in_interface('{mutation}', udpm.interacting_aa_1)) OR (ud2.uniprot_id = '{uniprot_id}' AND {db_schema}.mutation_in_interface('{mutation}', udpm.interacting_aa_2))) AND udpm.model_errors IS NULL AND -- we allow for some model errors model_filename_wt IS NULL; """.format( uniprot_id=uniprot_id, mutation=mutation, db_schema=conf.CONFIGS["db_schema"] ) logger.debug(sql_query) df = pd.read_sql_query(sql_query, conf.CONFIGS["engine"]) assert len(df) == 0
def validate_mutation_2(uniprot_id, mutation): """Select domains without models; assert length 0 """ logger.debug(helper.underline("Validating that we have domain models...")) sql_query = """\ select * from {db_schema}.uniprot_domain join {db_schema}.uniprot_domain_template using (uniprot_domain_id) left join {db_schema}.uniprot_domain_model using (uniprot_domain_id) where uniprot_id = '{uniprot_id}' and model_filename is null and model_errors is null; """.format( uniprot_id=uniprot_id, db_schema=conf.CONFIGS["db_schema"] ) logger.debug(sql_query) df = pd.read_sql_query(sql_query, conf.CONFIGS["engine"]) assert len(df) == 0
def validate_mutation_3(uniprot_id, mutation): """Select interfaces without models; assert length 0 """ logger.debug(helper.underline("Validating that we have domain pair models...")) sql_query = """\ select * from {db_schema}.uniprot_domain_pair udp join {db_schema}.uniprot_domain ud1 on (ud1.uniprot_domain_id = udp.uniprot_domain_id_1) join {db_schema}.uniprot_domain ud2 on (ud2.uniprot_domain_id = udp.uniprot_domain_id_2) join {db_schema}.uniprot_domain_pair_template udpt using (uniprot_domain_pair_id) left join {db_schema}.uniprot_domain_pair_model udpm using (uniprot_domain_pair_id) where (ud1.uniprot_id = '{uniprot_id}' or ud2.uniprot_id = '{uniprot_id}') and model_filename is null and model_errors is null; """.format( uniprot_id=uniprot_id, db_schema=conf.CONFIGS["db_schema"] ) logger.debug(sql_query) df = pd.read_sql_query(sql_query, conf.CONFIGS["engine"]) assert len(df) == 0
def validate_mutation_4(uniprot_id, mutation): """Select domains where we don't have mutatons even though we should; assert length 0 """ logger.debug(helper.underline("Validating that we have domain mutations...")) sql_query = """\ SELECT * FROM {db_schema}.uniprot_domain ud JOIN {db_schema}.uniprot_domain_template using (uniprot_domain_id) JOIN {db_schema}.uniprot_domain_model udm using (uniprot_domain_id) LEFT JOIN {db_schema}.uniprot_domain_mutation udmut ON (udmut.uniprot_domain_id = ud.uniprot_domain_id AND mutation = '{mutation}') WHERE ud.uniprot_id = '{uniprot_id}' AND {db_schema}.mutation_in_domain('{mutation}', model_domain_def) AND udm.model_errors is null AND -- we allow for some model errors model_filename_wt is null; """.format( uniprot_id=uniprot_id, mutation=mutation, db_schema=conf.CONFIGS["db_schema"] ) logger.debug(sql_query) df = pd.read_sql_query(sql_query, conf.CONFIGS["engine"]) assert len(df) == 0