def testCreateDatasourceWithMissingValues(self): """ """ dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.missing.txt") destDir = "out" datasourceFilename = "ESP6500SI-V2.chr1.snps_indels.head.25.missing.tabix_indexed.txt.gz" indexColumnNames = "CHROM,POS,POS" dataSourceType = "indexed_tsv" dataSourceName = "ESP" dataSourceVersion = "6500SI-V2" dataSourceMatchMode = "overlap" annotationColumnNames = "EA_GTC,DP" configFilename = os.path.join("out", "esp_coverage.missing.config") datasourceBuilder = TabixIndexedTsvDatasourceCreator() datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType, dataSourceName, dataSourceVersion, dataSourceMatchMode, annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames)) configParser = ConfigUtils.createConfigParser(configFilename) self.assertEqual(configParser.get("general", "src_file"), datasourceFilename, "Expected data source src_file is %s but was %s." % (datasourceFilename, configParser.get("general", "src_file"))) self.assertEqual(configParser.get("data_types", "EA_GTC"), "Float", "Expected EA_GTC data type is %s but was %s." % ("Float", configParser.get("data_types", "EA_GTC"))) self.assertEqual(configParser.get("data_types", "DP"), "Integer", "Expected DP data type is %s but was %s." % ("Integer", configParser.get("data_types", "DP")))
def testCreateDatasourceWithMissingAnnotationColumns(self): """ """ dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.missing.txt") destDir = "out" indexColumnNames = "CHROM,POS,POS" dataSourceType = "indexed_tsv" dataSourceName = "ESP" dataSourceVersion = "6500SI-V2" dataSourceMatchMode = "overlap" annotationColumnNames = "EA_GTC,DP,ESP_DBSNP" configFilename = os.path.join("out", "esp_coverage.missing_annotation_cols.config") datasourceBuilder = TabixIndexedTsvDatasourceCreator() with self.assertRaises(ValueError): datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType, dataSourceName, dataSourceVersion, dataSourceMatchMode, annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames))
def testCreateDatasourceWithMissingAnnotationColumns(self): """ """ dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt") destDir = "out" indexColumnNames = "CHROM,POS,POS" dataSourceType = "indexed_tsv" dataSourceName = "ESP" dataSourceVersion = "6500SI-V2" dataSourceMatchMode = "overlap" annotationColumnNames = "EA_GTC,DP,ESP_DBSNP" configFilename = os.path.join("out", "esp_coverage.missing.config") datasourceBuilder = TabixIndexedTsvDatasourceCreator() try: datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType, dataSourceName, dataSourceVersion, dataSourceMatchMode, annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames)) except ValueError: pass
def testCreateDatasourceWithMissingColumns(self): """ """ dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt") destDir = "out" indexColumnNames = "CHROM,POS,POS" dataSourceType = "indexed_tsv" dataSourceName = "ESP" dataSourceVersion = "6500SI-V2" dataSourceMatchMode = "overlap" annotationColumnNames = "EA_GTC,DP" configFilename = os.path.join("out", "esp_coverage.missing.config") datasourceBuilder = TabixIndexedTsvDatasourceCreator() try: datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType, dataSourceName, dataSourceVersion, dataSourceMatchMode, annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames)) except InputMismatchException: pass
def testCreateDatasource(self): """ """ dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt") destDir = "out" datasourceFilename = "ESP6500SI-V2.chr1.snps_indels.head.25.tabix_indexed.txt.gz" indexColumnNames = "CHROM,POS,POS,REF,ALT" columnNames = "CHROM,POS,REF,ALT,DBSNP,EA_AC,AA_AC,TAC,MAF,GTS,EA_GTC,AA_GTC,GTC,DP,FG,GM,AA,AAC,PP,CDP,PH,CP,CG,GL,GS,CA,EXOME_CHIP,GWAS_PUBMED" configFilename = "out/esp_coverage.config" dataSourceType = "indexed_tsv" dataSourceName = "ESP" dataSourceVersion = "6500SI-V2" dataSourceMatchMode = "overlap" annotationColumnNames = "DBSNP,EA_GTC,DP" datasourceBuilder = TabixIndexedTsvDatasourceCreator() datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType, dataSourceName, dataSourceVersion, dataSourceMatchMode, annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames)) configParser = ConfigUtils.createConfigParser(configFilename) self.assertTrue(configParser.has_section("general"), "general section is missing.") self.assertTrue(configParser.has_section("data_types"), "data_types section is missing.") self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.") self.assertTrue(configParser.has_option("general", "src_file"), "src_file option is missing in general section.") self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.") self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.") self.assertTrue(configParser.has_option("general", "column_names"), "column_names option is missing in general section.") self.assertTrue(configParser.has_option("general", "annotation_column_names"), "annotation_column_names option is missing in general section.") self.assertTrue(configParser.has_option("general", "match_mode"), "match_mode option is missing in general section") self.assertTrue(configParser.has_option("general", "index_column_names"), "index_column_names option is missing in general section.") self.assertEqual(configParser.get("general", "type"), dataSourceType, "Expected data source type is %s but was %s." % (dataSourceType, configParser.get("general", "type"))) self.assertEqual(configParser.get("general", "src_file"), datasourceFilename, "Expected data source src_file is %s but was %s." % (datasourceFilename, configParser.get("general", "src_file"))) self.assertEqual(configParser.get("general", "title"), dataSourceName, "Expected data source title is %s but was %s." % (dataSourceName, configParser.get("general", "title"))) self.assertEqual(configParser.get("general", "version"), dataSourceVersion, "Expected data source version is %s but was %s." % (dataSourceVersion, configParser.get("general", "version"))) self.assertEqual(configParser.get("general", "column_names"), columnNames, "Expected data source column names is %s but was %s." % (columnNames, configParser.get("general", "column_names"))) self.assertEqual(configParser.get("general", "annotation_column_names"), annotationColumnNames, "Expected data source annotation column names is %s but was %s." % (annotationColumnNames, configParser.get("general", "annotation_column_names"))) self.assertEqual(configParser.get("general", "match_mode"), dataSourceMatchMode, "Expected data source match mode is %s but was %s." % (dataSourceMatchMode, configParser.get("general", "match_mode"))) self.assertEqual(configParser.get("general", "index_column_names"), indexColumnNames, "Expected data source index column names is %s but was %s." % (indexColumnNames, configParser.get("general", "index_column_names"))) self.assertEqual(configParser.get("data_types", "EA_GTC"), "String", "Expected EA_GTC data type is %s but was %s." % ("String", configParser.get("data_types", "EA_GTC"))) self.assertEqual(configParser.get("data_types", "DP"), "Integer", "Expected DP data type is %s but was %s." % ("Integer", configParser.get("data_types", "DP")))
def testCreateDatasource(self): """ """ dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt") # Never specify "out/" destDir = "out/create_ds_test/" if os.path.exists(destDir): shutil.rmtree(destDir) os.makedirs(destDir) datasourceFilename = "ESP6500SI-V2.chr1.snps_indels.head.25.tabix_indexed.txt.gz" indexColumnNames = "CHROM,POS,POS,REF,ALT" columnNames = "CHROM,POS,REF,ALT,DBSNP,EA_AC,AA_AC,TAC,MAF,GTS,EA_GTC,AA_GTC,GTC,DP,FG,GM,AA,AAC,PP,CDP,PH,CP,CG,GL,GS,CA,EXOME_CHIP,GWAS_PUBMED" configFilename = "out/esp_coverage.config" dataSourceType = "indexed_tsv" dataSourceName = "ESP" dataSourceVersion = "6500SI-V2" dataSourceMatchMode = "overlap" annotationColumnNames = "DBSNP,EA_GTC,DP" datasourceBuilder = TabixIndexedTsvDatasourceCreator() datasourceBuilder.createDatasource( destDir, dsFile, indexColumnNames, configFilename, dataSourceType, dataSourceName, dataSourceVersion, dataSourceMatchMode, annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames)) self.assertTrue(os.path.exists(destDir + datasourceFilename)) self.assertTrue(os.path.exists(destDir + datasourceFilename + ".tbi")) configParser = ConfigUtils.createConfigParser(configFilename) self.assertTrue(configParser.has_section("general"), "general section is missing.") self.assertTrue(configParser.has_section("data_types"), "data_types section is missing.") self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.") self.assertTrue(configParser.has_option("general", "src_file"), "src_file option is missing in general section.") self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.") self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.") self.assertTrue(configParser.has_option("general", "column_names"), "column_names option is missing in general section.") self.assertTrue( configParser.has_option("general", "annotation_column_names"), "annotation_column_names option is missing in general section.") self.assertTrue(configParser.has_option("general", "match_mode"), "match_mode option is missing in general section") self.assertTrue( configParser.has_option("general", "index_column_names"), "index_column_names option is missing in general section.") self.assertEqual( configParser.get("general", "type"), dataSourceType, "Expected data source type is %s but was %s." % (dataSourceType, configParser.get("general", "type"))) self.assertEqual( configParser.get("general", "src_file"), datasourceFilename, "Expected data source src_file is %s but was %s." % (datasourceFilename, configParser.get("general", "src_file"))) self.assertEqual( configParser.get("general", "title"), dataSourceName, "Expected data source title is %s but was %s." % (dataSourceName, configParser.get("general", "title"))) self.assertEqual( configParser.get("general", "version"), dataSourceVersion, "Expected data source version is %s but was %s." % (dataSourceVersion, configParser.get("general", "version"))) self.assertEqual( configParser.get("general", "column_names"), columnNames, "Expected data source column names is %s but was %s." % (columnNames, configParser.get("general", "column_names"))) self.assertEqual( configParser.get("general", "annotation_column_names"), annotationColumnNames, "Expected data source annotation column names is %s but was %s." % (annotationColumnNames, configParser.get("general", "annotation_column_names"))) self.assertEqual( configParser.get("general", "match_mode"), dataSourceMatchMode, "Expected data source match mode is %s but was %s." % (dataSourceMatchMode, configParser.get("general", "match_mode"))) self.assertEqual( configParser.get("general", "index_column_names"), indexColumnNames, "Expected data source index column names is %s but was %s." % (indexColumnNames, configParser.get("general", "index_column_names"))) self.assertEqual( configParser.get("data_types", "EA_GTC"), "String", "Expected EA_GTC data type is %s but was %s." % ("String", configParser.get("data_types", "EA_GTC"))) self.assertEqual( configParser.get("data_types", "DP"), "Integer", "Expected DP data type is %s but was %s." % ("Integer", configParser.get("data_types", "DP"))) ds = DatasourceFactory.createDatasourceFromConfigParser( configParser, "out/create_ds_test/") mut = MutationData(chr="1", start="69428", end="69428", ref_allele="T", alt_allele="G") mut2 = ds.annotate_mutation(mut) self.assertEquals(mut2["ESP_DBSNP"], "dbSNP_134") self.assertEquals(mut2["ESP_EA_GTC"], "92,129,3203") self.assertEquals(mut2["ESP_DP"], "110")
def testCreateDatasource(self): """ """ dsFile = os.path.join("testdata", "ESP6500SI-V2.chr1.snps_indels.head.25.txt") # Never specify "out/" destDir = "out/create_ds_test/" if os.path.exists(destDir): shutil.rmtree(destDir) os.makedirs(destDir) datasourceFilename = "ESP6500SI-V2.chr1.snps_indels.head.25.tabix_indexed.txt.gz" indexColumnNames = "CHROM,POS,POS,REF,ALT" columnNames = "CHROM,POS,REF,ALT,DBSNP,EA_AC,AA_AC,TAC,MAF,GTS,EA_GTC,AA_GTC,GTC,DP,FG,GM,AA,AAC,PP,CDP,PH,CP,CG,GL,GS,CA,EXOME_CHIP,GWAS_PUBMED" configFilename = "out/esp_coverage.config" dataSourceType = "indexed_tsv" dataSourceName = "ESP" dataSourceVersion = "6500SI-V2" dataSourceMatchMode = "overlap" annotationColumnNames = "DBSNP,EA_GTC,DP" datasourceBuilder = TabixIndexedTsvDatasourceCreator() datasourceBuilder.createDatasource(destDir, dsFile, indexColumnNames, configFilename, dataSourceType, dataSourceName, dataSourceVersion, dataSourceMatchMode, annotationColumnNames, DatasourceInstallUtils.getIndexCols(dataSourceType, indexColumnNames)) self.assertTrue(os.path.exists(destDir + datasourceFilename)) self.assertTrue(os.path.exists(destDir + datasourceFilename + ".tbi")) configParser = ConfigUtils.createConfigParser(configFilename) self.assertTrue(configParser.has_section("general"), "general section is missing.") self.assertTrue(configParser.has_section("data_types"), "data_types section is missing.") self.assertTrue(configParser.has_option("general", "type"), "type option is missing in general section.") self.assertTrue(configParser.has_option("general", "src_file"), "src_file option is missing in general section.") self.assertTrue(configParser.has_option("general", "title"), "title option is missing in general section.") self.assertTrue(configParser.has_option("general", "version"), "version option is missing in general section.") self.assertTrue(configParser.has_option("general", "column_names"), "column_names option is missing in general section.") self.assertTrue(configParser.has_option("general", "annotation_column_names"), "annotation_column_names option is missing in general section.") self.assertTrue(configParser.has_option("general", "match_mode"), "match_mode option is missing in general section") self.assertTrue(configParser.has_option("general", "index_column_names"), "index_column_names option is missing in general section.") self.assertEqual(configParser.get("general", "type"), dataSourceType, "Expected data source type is %s but was %s." % (dataSourceType, configParser.get("general", "type"))) self.assertEqual(configParser.get("general", "src_file"), datasourceFilename, "Expected data source src_file is %s but was %s." % (datasourceFilename, configParser.get("general", "src_file"))) self.assertEqual(configParser.get("general", "title"), dataSourceName, "Expected data source title is %s but was %s." % (dataSourceName, configParser.get("general", "title"))) self.assertEqual(configParser.get("general", "version"), dataSourceVersion, "Expected data source version is %s but was %s." % (dataSourceVersion, configParser.get("general", "version"))) self.assertEqual(configParser.get("general", "column_names"), columnNames, "Expected data source column names is %s but was %s." % (columnNames, configParser.get("general", "column_names"))) self.assertEqual(configParser.get("general", "annotation_column_names"), annotationColumnNames, "Expected data source annotation column names is %s but was %s." % (annotationColumnNames, configParser.get("general", "annotation_column_names"))) self.assertEqual(configParser.get("general", "match_mode"), dataSourceMatchMode, "Expected data source match mode is %s but was %s." % (dataSourceMatchMode, configParser.get("general", "match_mode"))) self.assertEqual(configParser.get("general", "index_column_names"), indexColumnNames, "Expected data source index column names is %s but was %s." % (indexColumnNames, configParser.get("general", "index_column_names"))) self.assertEqual(configParser.get("data_types", "EA_GTC"), "String", "Expected EA_GTC data type is %s but was %s." % ("String", configParser.get("data_types", "EA_GTC"))) self.assertEqual(configParser.get("data_types", "DP"), "Integer", "Expected DP data type is %s but was %s." % ("Integer", configParser.get("data_types", "DP"))) ds = DatasourceFactory.createDatasourceFromConfigParser(configParser, "out/create_ds_test/") mut = MutationData(chr="1", start="69428", end="69428", ref_allele="T", alt_allele="G") mut2 = ds.annotate_mutation(mut) self.assertEquals(mut2["ESP_DBSNP"], "dbSNP_134") self.assertEquals(mut2["ESP_EA_GTC"], "92,129,3203") self.assertEquals(mut2["ESP_DP"], "110")