class SffFlow(Tabular): MetadataElement(name="flow_values", default="", no_value="", optional=True, desc="Total number of flow values", readonly=True) MetadataElement(name="flow_order", default="TACG", no_value="TACG", desc="Total number of flow values", readonly=False) file_ext = 'mothur.sff.flow' """ http://www.mothur.org/wiki/Flow_file The first line is the total number of flow values - 800 for Titanium data. For GS FLX it would be 400. Following lines contain: - SequenceName - the number of useable flows as defined by 454's software - the flow intensity for each base going in the order of TACG. Example: 800 GQY1XT001CQL4K 85 1.04 0.00 1.00 0.02 0.03 1.02 0.05 ... GQY1XT001CQIRF 84 1.02 0.06 0.98 0.06 0.09 1.05 0.07 ... GQY1XT001CF5YW 88 1.02 0.02 1.01 0.04 0.06 1.02 0.03 ... """ def __init__(self, **kwd): super(SffFlow, self).__init__(**kwd) def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=None, **kwd): super(SffFlow, self).set_meta(dataset, overwrite, 1, max_data_lines) headers = get_headers(dataset.file_name, sep='\t', count=1) try: flow_values = int(headers[0][0]) dataset.metadata.flow_values = flow_values except Exception as e: log.warning("SffFlow set_meta %s" % e) def make_html_table(self, dataset, skipchars=[]): """Create HTML table, used for displaying peek""" try: out = '<table cellspacing="0" cellpadding="3">' # Generate column header out += '<tr>' out += '<th>%d. Name</th>' % 1 out += '<th>%d. Flows</th>' % 2 for i in range(3, dataset.metadata.columns + 1): base = dataset.metadata.flow_order[(i + 1) % 4] out += '<th>%d. %d %s</th>' % (i - 2, base) out += '</tr>' out += self.make_html_peek_rows(dataset, skipchars=skipchars) out += '</table>' except Exception as exc: out = "Can't create peek %s" % str(exc) return out
class SQlite(Binary): """Class describing a Sqlite database """ MetadataElement(name="tables", default=[], param=ListParameter, desc="Database Tables", readonly=True, visible=True, no_value=[]) MetadataElement(name="table_columns", default={}, param=DictParameter, desc="Database Table Columns", readonly=True, visible=True, no_value={}) MetadataElement(name="table_row_count", default={}, param=DictParameter, desc="Database Table Row Count", readonly=True, visible=True, no_value={}) file_ext = "sqlite" def init_meta(self, dataset, copy_from=None): Binary.init_meta(self, dataset, copy_from=copy_from) def set_meta(self, dataset, overwrite=True, **kwd): try: tables = [] columns = dict() rowcounts = dict() conn = sqlite3.connect(dataset.file_name) c = conn.cursor() tables_query = "SELECT name,sql FROM sqlite_master WHERE type='table' ORDER BY name" rslt = c.execute(tables_query).fetchall() for table, sql in rslt: tables.append(table) columns[table] = re.sub('^.*\((.*)\)$', '\\1', sql).split(',') for table in tables: row_query = "SELECT count(*) FROM %s" % table rowcounts[table] = c.execute(row_query).fetchone()[0] dataset.metadata.tables = tables dataset.metadata.table_columns = columns dataset.metadata.table_row_count = rowcounts except Exception, exc: pass
class Quantile(Tabular): file_ext = 'mothur.quan' MetadataElement(name="filtered", default=False, no_value=False, optional=True, desc="Quantiles calculated using a mask", readonly=True) MetadataElement(name="masked", default=False, no_value=False, optional=True, desc="Quantiles calculated using a frequency filter", readonly=True) def __init__(self, **kwd): """Quantiles for chimera analysis""" super(Quantile, self).__init__(**kwd) self.column_names = ['num', 'ten', 'twentyfive', 'fifty', 'seventyfive', 'ninetyfive', 'ninetynine'] self.column_types = ['int', 'float', 'float', 'float', 'float', 'float', 'float'] def sniff(self, filename): """ Determines whether the file is a quantiles tabular format for chimera analysis 1 0 0 0 0 0 0 2 0.309198 0.309198 0.37161 0.37161 0.37161 0.37161 3 0.510982 0.563213 0.693529 0.858939 1.07442 1.20608 ... >>> from galaxy.datatypes.sniff import get_test_fname >>> fname = get_test_fname( 'mothur_datatypetest_true.mothur.quan' ) >>> Quantile().sniff( fname ) True >>> fname = get_test_fname( 'mothur_datatypetest_false.mothur.quan' ) >>> Quantile().sniff( fname ) False """ headers = iter_headers(filename, sep='\t') count = 0 for line in headers: if not line[0].startswith('@') and not line[0].startswith('#'): if len(line) != 7: return False try: int(line[0]) float(line[1]) float(line[2]) float(line[3]) float(line[4]) float(line[5]) float(line[6]) except Exception: return False count += 1 if count > 0: return True return False
class MauveXmfa(Text): file_ext = "xmfa" MetadataElement(name="number_of_models", default=0, desc="Number of alignmened sequences", readonly=True, visible=True, optional=True, no_value=0) def set_peek(self, dataset): if not dataset.dataset.purged: if (dataset.metadata.number_of_models == 1): dataset.blurb = "1 alignment" else: dataset.blurb = f"{dataset.metadata.number_of_models} alignments" dataset.peek = get_file_peek(dataset.file_name) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disc' def sniff_prefix(self, file_prefix: FilePrefix): return file_prefix.startswith('#FormatVersion Mauve1') def set_meta(self, dataset, **kwd): dataset.metadata.number_of_models = generic_util.count_special_lines( '^#Sequence([[:digit:]]+)Entry', dataset.file_name)
class HmmPressed(Hmm): """Class describing a hmmer database produced by hmmpress""" file_ext = 'hmmPressed' composite_type = 'basic' MetadataElement(readonly=True, optional=True, visible=False, no_value=0) def __init__(self, **kwd): data.Data.__init__(self, **kwd) self.add_composite_file('hmm.h3m') self.add_composite_file('hmm.h3i') self.add_composite_file('hmm.h3f') self.add_composite_file('hmm.h3p') def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = "Folder of multiple files" dataset.blurb = "Folder of multiple files" else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset): try: return dataset.peek except: return "Folder of multiple files" def get_mime(self): """Returns the mime type of the datatype""" return 'text/plain'
class ExpressionJson(Json): """ Represents the non-data input or output to a tool or workflow. """ file_ext = "json" MetadataElement(name="json_type", default=None, desc="JavaScript or JSON type of expression", readonly=True, visible=True, no_value=None) def set_meta(self, dataset, **kwd): """ """ json_type = "null" with open(dataset.file_name) as f: obj = json.load(f) if isinstance(obj, int): json_type = "int" elif isinstance(obj, float): json_type = "float" elif isinstance(obj, list): json_type = "list" elif isinstance(obj, dict): json_type = "object" dataset.metadata.json_type = json_type
class TextGrid(Text): """Praat Textgrid file for speech annotations >>> from galaxy.datatypes.sniff import get_test_fname >>> fname = get_test_fname('1_1119_2_22_001.textgrid') >>> TextGrid().sniff(fname) True >>> fname = get_test_fname('drugbank_drugs.cml') >>> TextGrid().sniff(fname) False """ file_ext = "textgrid" header = 'File type = "ooTextFile"\nObject class = "TextGrid"\n' blurb = "Praat TextGrid file" MetadataElement(name="annotations", default=[], desc="Annotation types", param=ListParameter, readonly=True, visible=True, optional=True, no_value=[]) def sniff(self, filename): with open(filename, 'r') as fd: text = fd.read(len(self.header)) return text == self.header
class AnnotatedTabular(Tabular): """ Tabular file with optional comment block containing JSON to be imported into metadata """ MetadataElement(name="comment_metadata", desc="comment metadata", param=metadata.DictParameter, visible=False, readonly=True) def set_meta(self, dataset, overwrite=True, **kwd): Tabular.set_meta(self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd) if dataset.metadata.comment_metadata is None: dataset_comment_metadata = DatasetCommentMetadata(dataset) dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy( ) self.set_dataset_metadata_from_comments(dataset) def set_dataset_metadata_from_comments(self, dataset): pass def set_peek(self, dataset, line_count=None, is_multi_byte=False): super(Tabular, self).set_peek(dataset, line_count=line_count, is_multi_byte=is_multi_byte, WIDTH='unlimited', skipchars=['#']) def display_peek(self, dataset): """Returns formated html of peek""" return Tabular.make_html_table(self, dataset, skipchars=['#'])
class GeminiSQLite(SQlite): """Class describing a Gemini Sqlite database """ MetadataElement(name="gemini_version", default='0.10.0', param=MetadataParameter, desc="Gemini Version", readonly=True, visible=True, no_value='0.10.0') file_ext = "gemini.sqlite" def set_meta(self, dataset, overwrite=True, **kwd): super(GeminiSQLite, self).set_meta(dataset, overwrite=overwrite, **kwd) try: conn = sqlite.connect(dataset.file_name) c = conn.cursor() tables_query = "SELECT version FROM version" result = c.execute(tables_query).fetchall() for version, in result: dataset.metadata.gemini_version = version # TODO: Can/should we detect even more attributes, such as use of PED file, what was input annotation type, etc. except Exception as e: log.warn('%s, set_meta Exception: %s', self, e) def sniff(self, filename): if super(GeminiSQLite, self).sniff(filename): gemini_table_names = [ "gene_detailed", "gene_summary", "resources", "sample_genotype_counts", "sample_genotypes", "samples", "variant_impacts", "variants", "version" ] try: conn = sqlite.connect(filename) c = conn.cursor() tables_query = "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" result = c.execute(tables_query).fetchall() result = map(lambda x: x[0], result) for table_name in gemini_table_names: if table_name not in result: return False return True except Exception as e: log.warn('%s, sniff Exception: %s', self, e) return False def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = "Gemini SQLite Database, version %s" % ( dataset.metadata.gemini_version or 'unknown') dataset.blurb = nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset): try: return dataset.peek except: return "Gemini SQLite Database, version %s" % ( dataset.metadata.gemini_version or 'unknown')
class DistanceMatrix(Text): file_ext = 'mothur.dist' """Add metadata elements""" MetadataElement(name="sequence_count", default=0, desc="Number of sequences", readonly=True, visible=True, optional=True, no_value='?') def init_meta(self, dataset, copy_from=None): super(DistanceMatrix, self).init_meta(dataset, copy_from=copy_from) def set_meta(self, dataset, overwrite=True, skip=0, **kwd): super(DistanceMatrix, self).set_meta(dataset, overwrite=overwrite, skip=skip, **kwd) headers = iter_headers(dataset.file_name, sep='\t') for line in headers: if not line[0].startswith('@'): try: dataset.metadata.sequence_count = int( ''.join(line)) # seq count sometimes preceded by tab break except Exception as e: if not isinstance(self, PairwiseDistanceMatrix): log.warning("DistanceMatrix set_meta %s" % e)
class Genealogy(Tabular): """Tab delimited data in Genealogy format""" file_ext = "pqmp" MetadataElement(name="columns", default=3, desc="Number of columns", readonly=True) def __init__(self, **kwd): """Initialize QTLMap:Genealogy datatype""" Tabular.__init__(self, **kwd) # self.do_something_else() def init_meta(self, dataset, copy_from=None): Tabular.init_meta(self, dataset, copy_from=copy_from) def sniff(self, filename): """ Format du fichier de genealogy : ID PERE MERE GENERATION """ handle = open(filename) line = handle.readline() handle.close() v = line.split() if (len(v) != 4): return False if (v[3] != "1" and v[3] != "2" and v[3] != "3"): return False return True
class Simulation(Tabular): """Tab delimited data in Simulation format""" file_ext = "sqmp" MetadataElement(name="columns", default=3, desc="Number of columns", readonly=True) def __init__(self, **kwd): """Initialize QTLMap:Simulation datatype""" data.Text.__init__(self, **kwd) # self.do_something_else() def init_meta(self, dataset, copy_from=None): Tabular.init_meta(self, dataset, copy_from=copy_from) def sniff(self, filename): handle = open(filename) line = handle.readline() handle.close() first = line.split() if (line.find("Trait") != -1 and line.find("LRTMAX") != -1 and line.find("Position CHR") != -1 and line.find("Position DX") != -1): return True return False
class OMETiff(Tiff): file_ext = "ome.tiff" MetadataElement(name="offsets", desc="Offsets File", param=FileParameter, file_ext="json", readonly=True, visible=False, optional=True) def set_meta(self, dataset, overwrite=True, **kwd): spec_key = 'offsets' offsets_file = dataset.metadata.offsets if not offsets_file: offsets_file = dataset.metadata.spec[spec_key].param.new_file( dataset=dataset) with tifffile.TiffFile(dataset.file_name) as tif: offsets = [page.offset for page in tif.pages] with open(offsets_file.file_name, 'w') as f: json.dump(offsets, f) dataset.metadata.offsets = offsets_file def sniff(self, filename): with tifffile.TiffFile(filename) as tif: if tif.is_ome: return True return False
class Group(Tabular): file_ext = 'mothur.groups' MetadataElement(name="groups", default=[], desc="Group Names", readonly=True, visible=True, no_value=[]) def __init__(self, **kwd): """ http://www.mothur.org/wiki/Groups_file Group file assigns sequence (col 1) to a group (col 2) """ super(Group, self).__init__(**kwd) self.column_names = ['name', 'group'] self.columns = 2 def set_meta(self, dataset, overwrite=True, skip=None, max_data_lines=None, **kwd): super(Group, self).set_meta(dataset, overwrite, skip, max_data_lines) group_names = set() headers = iter_headers(dataset.file_name, sep='\t', count=-1) for line in headers: if len(line) > 1: group_names.add(line[1]) dataset.metadata.groups = list(group_names)
class MauveXmfa(Text): file_ext = "xmfa" MetadataElement(name="number_of_models", default=0, desc="Number of alignmened sequences", readonly=True, visible=True, optional=True, no_value=0) def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) if (dataset.metadata.number_of_models == 1): dataset.blurb = "1 alignment" else: dataset.blurb = "%s alignments" % dataset.metadata.number_of_models dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disc' def sniff(self, filename): with open(filename, 'r') as handle: return handle.read(21) == '#FormatVersion Mauve1' return False def set_meta(self, dataset, **kwd): dataset.metadata.number_of_models = generic_util.count_special_lines( '^#Sequence([[:digit:]]+)Entry', dataset.file_name)
class ExpressionJson(Json): """ Represents the non-data input or output to a tool or workflow. """ file_ext = "json" MetadataElement(name="json_type", default=None, desc="JavaScript or JSON type of expression", readonly=True, visible=True, no_value=None) def set_meta(self, dataset, **kwd): """ """ if dataset.has_data(): json_type = "null" file_path = dataset.file_name try: with open(file_path) as f: obj = json.load(f) if isinstance(obj, int): json_type = "int" elif isinstance(obj, float): json_type = "float" elif isinstance(obj, list): json_type = "list" elif isinstance(obj, dict): json_type = "object" except json.decoder.JSONDecodeError: with open(file_path) as f: contents = f.read(512) raise Exception(f"Invalid JSON encountered {contents}") dataset.metadata.json_type = json_type
class GenericMolFile(data.Text): """ abstract class for most of the molecule files """ MetadataElement(name="number_of_molecules", default=0, desc="Number of molecules", readonly=True, visible=True, optional=True, no_value=0) def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) if (dataset.metadata.number_of_molecules == 1): dataset.blurb = "1 molecule" else: dataset.blurb = "%s molecules" % dataset.metadata.number_of_molecules dataset.peek = data.get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def get_mime(self): return 'text/plain'
class PdedJoin(Tabular): """Tab delimited data in PdedJoin format""" file_ext = "pdedj" MetadataElement(name="columns", default=3, desc="Number of columns", readonly=True) def __init__(self, **kwd): """Initialize QTLMap:PdedJoin datatype""" data.Text.__init__(self, **kwd) # self.do_something_else() def init_meta(self, dataset, copy_from=None): Tabular.init_meta(self, dataset, copy_from=copy_from) def sniff(self, filename): handle = open(filename) line = handle.readline() handle.close() first = line.split() if (line.find("Position") != -1 and line.find("Sire") != -1 and line.find("Dam_Phase") != -1 and line.find("p(Hs1/Hd1") != -1): return True return False
class SnpEffDb(Text): """Class describing a SnpEff genome build""" file_ext = "snpeffdb" MetadataElement(name="genome_version", default="unknown", desc="Genome Version", readonly=True, visible=True, no_value=None) MetadataElement(name="snpeff_version", default="SnpEff4.0", desc="SnpEff Version", readonly=True, visible=True, no_value=None) MetadataElement(name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True) MetadataElement(name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True) def __init__(self, **kwd): Text.__init__(self, **kwd) # The SnpEff version line was added in SnpEff version 4.1 def getSnpeffVersionFromFile(self, path): snpeff_version = None try: fh = gzip.open(path, 'rb') buf = fh.read(100) lines = buf.splitlines() m = re.match('^(SnpEff)\s+(\d+\.\d+).*$', lines[0].strip()) if m: snpeff_version = m.groups()[0] + m.groups()[1] fh.close() except Exception, e: return e return snpeff_version
class Vcf( Tabular ): """ Variant Call Format for describing SNPs and other simple genome variations. """ edam_format = "format_3016" track_type = "VariantTrack" data_sources = { "data": "tabix", "index": "bigwig" } file_ext = 'vcf' column_names = [ 'Chrom', 'Pos', 'ID', 'Ref', 'Alt', 'Qual', 'Filter', 'Info', 'Format', 'data' ] MetadataElement( name="columns", default=10, desc="Number of columns", readonly=True, visible=False ) MetadataElement( name="column_types", default=['str', 'int', 'str', 'str', 'str', 'int', 'str', 'list', 'str', 'str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False ) MetadataElement( name="viz_filter_cols", desc="Score column for visualization", default=[5], param=metadata.ColumnParameter, optional=True, multiple=True, visible=False ) MetadataElement( name="sample_names", default=[], desc="Sample names", readonly=True, visible=False, optional=True, no_value=[] ) def sniff( self, filename ): headers = get_headers( filename, '\n', count=1 ) return headers[0][0].startswith("##fileformat=VCF") def display_peek( self, dataset ): """Returns formated html of peek""" return Tabular.make_html_table( self, dataset, column_names=self.column_names ) def set_meta( self, dataset, **kwd ): Tabular.set_meta( self, dataset, **kwd ) source = open( dataset.file_name ) # Skip comments. line = None for line in source: if not line.startswith( '##' ): break if line and line.startswith( '#' ): # Found header line, get sample names. dataset.metadata.sample_names = line.split()[ 9: ] # Dataproviders @dataproviders.decorators.dataprovider_factory( 'genomic-region', dataproviders.dataset.GenomicRegionDataProvider.settings ) def genomic_region_dataprovider( self, dataset, **settings ): return dataproviders.dataset.GenomicRegionDataProvider( dataset, 0, 1, 1, **settings ) @dataproviders.decorators.dataprovider_factory( 'genomic-region-dict', dataproviders.dataset.GenomicRegionDataProvider.settings ) def genomic_region_dict_dataprovider( self, dataset, **settings ): settings[ 'named_columns' ] = True return self.genomic_region_dataprovider( dataset, **settings )
class SnpEffDb( Text ): """Class describing a SnpEff genome build""" file_ext = "snpeffdb" MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None ) MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True) MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True) def __init__( self, **kwd ): Text.__init__( self, **kwd ) def set_meta( self, dataset, **kwd ): Text.set_meta(self, dataset, **kwd ) data_dir = dataset.extra_files_path ## search data_dir/genome_version for files regulation_pattern = 'regulation_(.+).bin' # annotation files that are included in snpEff by a flag annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} regulations = [] annotations = [] if data_dir and os.path.isdir(data_dir): for root, dirs, files in os.walk(data_dir): for fname in files: if fname.startswith('snpEffectPredictor'): # if snpEffectPredictor.bin download succeeded genome_version = os.path.basename(root) dataset.metadata.genome_version = genome_version else: m = re.match(regulation_pattern,fname) if m: name = m.groups()[0] regulations.append(name) elif fname in annotations_dict: value = annotations_dict[fname] name = value.lstrip('-') annotations.append(name) dataset.metadata.regulation = regulations dataset.metadata.annotation = annotations try: fh = file(dataset.file_name,'w') fh.write("%s\n" % genome_version) if annotations: fh.write("annotations: %s\n" % ','.join(annotations)) if regulations: fh.write("regulations: %s\n" % ','.join(regulations)) fh.close() except: pass
class Model(data.Text): """Tab delimited data in Model format""" file_ext = "qtlmap.model" MetadataElement(name="columns", default=3, desc="Number of columns", readonly=True) def __init__(self, **kwd): """Initialize QTLMap:Model datatype""" data.Text.__init__(self, **kwd) # self.do_something_else() def init_meta(self, dataset, copy_from=None): data.Text.init_meta(self, dataset, copy_from=copy_from) def sniff(self, filename): """ Format du fichier de Model nb carac nbfix nbcov car r/a/i 0/1.... """ handle = open(filename) line = handle.readline() #nb carac first = line.split() if (not first[0].isdigit()): return False ncar = int(first[0]) line = handle.readline() first = line.split() if (not first[0].isdigit()): return False if (not first[1].isdigit()): return False nfix = int(first[0]) ncov = int(first[1]) #nom des effets line = handle.readline() first = line.split() if (len(first) < (nfix + ncov)): return False for i in range(ncar): line = handle.readline() first = line.split() if (len(first) < (2 + 2 * nfix + ncov)): return False handle.close() return True
class FeatureLocationIndex(Tabular): """ An index that stores feature locations in tabular format. """ file_ext = 'fli' MetadataElement(name="columns", default=2, desc="Number of columns", readonly=True, visible=False) MetadataElement(name="column_types", default=['str', 'str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False, no_value=[])
class AnvioSamplesDB(AnvioDB): """Class for Anvio Samples DB database files.""" _anvio_basename = 'SAMPLES.db' MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = 'anvio_samples_db'
class LastzCoverage(Tabular): file_ext = "coverage" MetadataElement(name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter) MetadataElement(name="positionCol", default=2, desc="Position column", param=metadata.ColumnParameter) MetadataElement(name="forwardCol", default=3, desc="Forward or aggregate read column", param=metadata.ColumnParameter) MetadataElement(name="reverseCol", desc="Optional reverse read column", param=metadata.ColumnParameter, optional=True, no_value=0) MetadataElement(name="columns", default=3, desc="Number of columns", readonly=True, visible=False) def get_track_resolution(self, dataset, start, end): range = end - start # Determine appropriate resolution to plot ~1000 points resolution = math.ceil(10 ** math.ceil(math.log10(range / 1000))) # Restrict to valid range resolution = min(resolution, 10000) resolution = max(resolution, 1) return resolution
class AnvioPanDB(AnvioDB): """Class for Anvio Pan DB database files.""" _anvio_basename = 'PAN.db' MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = 'anvio_pan_db'
class AnvioGenomesDB(AnvioDB): """Class for Anvio Genomes DB database files.""" _anvio_basename = '-GENOMES.db' MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = 'anvio_genomes_db'
class AnvioStructureDB(AnvioDB): """Class for Anvio Structure DB database files.""" _anvio_basename = 'STRUCTURE.db' MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True) file_ext = 'anvio_structure_db'
class Bcf(Binary): """Class describing a BCF file""" edam_format = "format_3020" file_ext = "bcf" MetadataElement(name="bcf_index", desc="BCF Index File", param=metadata.FileParameter, file_ext="csi", readonly=True, no_value=None, visible=False, optional=True) def sniff(self, filename): # BCF is compressed in the BGZF format, and must not be uncompressed in Galaxy. # The first 3 bytes of any bcf file is 'BCF', and the file is binary. try: header = gzip.open(filename).read(3) if binascii.b2a_hex(header) == binascii.hexlify('BCF'): return True return False except: return False def set_meta(self, dataset, overwrite=True, **kwd): """ Creates the index for the BCF file. """ # These metadata values are not accessible by users, always overwrite index_file = dataset.metadata.bcf_index if not index_file: index_file = dataset.metadata.spec['bcf_index'].param.new_file( dataset=dataset) # Create the bcf index # $ bcftools index # Usage: bcftools index <in.bcf> dataset_symlink = os.path.join( os.path.dirname(index_file.file_name), '__dataset_%d_%s' % (dataset.id, os.path.basename(index_file.file_name))) os.symlink(dataset.file_name, dataset_symlink) stderr_name = tempfile.NamedTemporaryFile( prefix="bcf_index_stderr").name command = ['bcftools', 'index', dataset_symlink] proc = subprocess.Popen(args=command, stderr=open(stderr_name, 'wb')) exit_code = proc.wait() shutil.move(dataset_symlink + '.csi', index_file.file_name) stderr = open(stderr_name).read().strip() if stderr: if exit_code != 0: os.unlink(stderr_name) # clean up raise Exception("Error Setting BCF Metadata: %s" % stderr) else: print stderr dataset.metadata.bcf_index = index_file # Remove temp file os.unlink(stderr_name)
class PlantTribesKsComponents(Tabular): file_ext = "ptkscmp" MetadataElement(name="number_comp", default=0, desc="Number of significant components in the Ks distribution", readonly=True, visible=True, no_value=0) def display_peek(self, dataset): try: return dataset.peek except Exception: return "Significant components in the Ks distribution (%s)" % (nice_size(dataset.get_size())) def set_meta(self, dataset, **kwd): """ Set the number of significant components in the Ks distribution. The dataset will always be on the order of less than 10 lines. """ super(PlantTribesKsComponents, self).set_meta(dataset, **kwd) significant_components = [] with open(dataset.file_name) as fh: for i, line in enumerate(fh): if i == 0: # Skip the first line. continue line = line.strip() items = line.split() try: # Could be \t. significant_components.append(int(items[2])) except Exception: continue if len(significant_components) > 0: dataset.metadata.number_comp = max(significant_components) def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = get_file_peek(dataset.file_name) if (dataset.metadata.number_comp == 1): dataset.blurb = "1 significant component" else: dataset.blurb = "%s significant components" % dataset.metadata.number_comp else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def sniff(self, filename): """ >>> from galaxy.datatypes.sniff import get_test_fname >>> fname = get_test_fname('test_tab.bed') >>> PlantTribesKsComponents().sniff(fname) False >>> fname = get_test_fname('1.ptkscmp') >>> PlantTribesKsComponents().sniff(fname) True """ try: line_item_str = get_headers(filename, '\\t', 1)[0][0] return line_item_str == 'species\tn\tnumber_comp\tlnL\tAIC\tBIC\tmean\tvariance\tporportion' except Exception: return False