Exemplo n.º 1
0
class SffFlow(Tabular):
    MetadataElement(name="flow_values", default="", no_value="", optional=True, desc="Total number of flow values", readonly=True)
    MetadataElement(name="flow_order", default="TACG", no_value="TACG", desc="Total number of flow values", readonly=False)
    file_ext = 'mothur.sff.flow'
    """
        http://www.mothur.org/wiki/Flow_file
        The first line is the total number of flow values - 800 for Titanium data. For GS FLX it would be 400.
        Following lines contain:
        - SequenceName
        - the number of useable flows as defined by 454's software
        - the flow intensity for each base going in the order of TACG.
        Example:
          800
          GQY1XT001CQL4K 85 1.04 0.00 1.00 0.02 0.03 1.02 0.05 ...
          GQY1XT001CQIRF 84 1.02 0.06 0.98 0.06 0.09 1.05 0.07 ...
          GQY1XT001CF5YW 88 1.02 0.02 1.01 0.04 0.06 1.02 0.03 ...
    """
    def __init__(self, **kwd):
        super(SffFlow, self).__init__(**kwd)

    def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=None, **kwd):
        super(SffFlow, self).set_meta(dataset, overwrite, 1, max_data_lines)

        headers = get_headers(dataset.file_name, sep='\t', count=1)
        try:
            flow_values = int(headers[0][0])
            dataset.metadata.flow_values = flow_values
        except Exception as e:
            log.warning("SffFlow set_meta %s" % e)

    def make_html_table(self, dataset, skipchars=[]):
        """Create HTML table, used for displaying peek"""
        try:
            out = '<table cellspacing="0" cellpadding="3">'

            # Generate column header
            out += '<tr>'
            out += '<th>%d. Name</th>' % 1
            out += '<th>%d. Flows</th>' % 2
            for i in range(3, dataset.metadata.columns + 1):
                base = dataset.metadata.flow_order[(i + 1) % 4]
                out += '<th>%d. %d %s</th>' % (i - 2, base)
            out += '</tr>'
            out += self.make_html_peek_rows(dataset, skipchars=skipchars)
            out += '</table>'
        except Exception as exc:
            out = "Can't create peek %s" % str(exc)
        return out
Exemplo n.º 2
0
class SQlite(Binary):
    """Class describing a Sqlite database """
    MetadataElement(name="tables",
                    default=[],
                    param=ListParameter,
                    desc="Database Tables",
                    readonly=True,
                    visible=True,
                    no_value=[])
    MetadataElement(name="table_columns",
                    default={},
                    param=DictParameter,
                    desc="Database Table Columns",
                    readonly=True,
                    visible=True,
                    no_value={})
    MetadataElement(name="table_row_count",
                    default={},
                    param=DictParameter,
                    desc="Database Table Row Count",
                    readonly=True,
                    visible=True,
                    no_value={})
    file_ext = "sqlite"

    def init_meta(self, dataset, copy_from=None):
        Binary.init_meta(self, dataset, copy_from=copy_from)

    def set_meta(self, dataset, overwrite=True, **kwd):
        try:
            tables = []
            columns = dict()
            rowcounts = dict()
            conn = sqlite3.connect(dataset.file_name)
            c = conn.cursor()
            tables_query = "SELECT name,sql FROM sqlite_master WHERE type='table' ORDER BY name"
            rslt = c.execute(tables_query).fetchall()
            for table, sql in rslt:
                tables.append(table)
                columns[table] = re.sub('^.*\((.*)\)$', '\\1', sql).split(',')
            for table in tables:
                row_query = "SELECT count(*) FROM %s" % table
                rowcounts[table] = c.execute(row_query).fetchone()[0]
            dataset.metadata.tables = tables
            dataset.metadata.table_columns = columns
            dataset.metadata.table_row_count = rowcounts
        except Exception, exc:
            pass
Exemplo n.º 3
0
class Quantile(Tabular):
    file_ext = 'mothur.quan'
    MetadataElement(name="filtered", default=False, no_value=False, optional=True, desc="Quantiles calculated using a mask", readonly=True)
    MetadataElement(name="masked", default=False, no_value=False, optional=True, desc="Quantiles calculated using a frequency filter", readonly=True)

    def __init__(self, **kwd):
        """Quantiles for chimera analysis"""
        super(Quantile, self).__init__(**kwd)
        self.column_names = ['num', 'ten', 'twentyfive', 'fifty', 'seventyfive', 'ninetyfive', 'ninetynine']
        self.column_types = ['int', 'float', 'float', 'float', 'float', 'float', 'float']

    def sniff(self, filename):
        """
        Determines whether the file is a quantiles tabular format for chimera analysis
        1	0	0	0	0	0	0
        2       0.309198        0.309198        0.37161 0.37161 0.37161 0.37161
        3       0.510982        0.563213        0.693529        0.858939        1.07442 1.20608
        ...

        >>> from galaxy.datatypes.sniff import get_test_fname
        >>> fname = get_test_fname( 'mothur_datatypetest_true.mothur.quan' )
        >>> Quantile().sniff( fname )
        True
        >>> fname = get_test_fname( 'mothur_datatypetest_false.mothur.quan' )
        >>> Quantile().sniff( fname )
        False
        """
        headers = iter_headers(filename, sep='\t')
        count = 0
        for line in headers:
            if not line[0].startswith('@') and not line[0].startswith('#'):
                if len(line) != 7:
                    return False
                try:
                    int(line[0])
                    float(line[1])
                    float(line[2])
                    float(line[3])
                    float(line[4])
                    float(line[5])
                    float(line[6])
                except Exception:
                    return False
                count += 1
        if count > 0:
            return True

        return False
Exemplo n.º 4
0
class MauveXmfa(Text):
    file_ext = "xmfa"

    MetadataElement(name="number_of_models",
                    default=0,
                    desc="Number of alignmened sequences",
                    readonly=True,
                    visible=True,
                    optional=True,
                    no_value=0)

    def set_peek(self, dataset):
        if not dataset.dataset.purged:
            if (dataset.metadata.number_of_models == 1):
                dataset.blurb = "1 alignment"
            else:
                dataset.blurb = f"{dataset.metadata.number_of_models} alignments"
            dataset.peek = get_file_peek(dataset.file_name)
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disc'

    def sniff_prefix(self, file_prefix: FilePrefix):
        return file_prefix.startswith('#FormatVersion Mauve1')

    def set_meta(self, dataset, **kwd):
        dataset.metadata.number_of_models = generic_util.count_special_lines(
            '^#Sequence([[:digit:]]+)Entry', dataset.file_name)
Exemplo n.º 5
0
class HmmPressed(Hmm):
    """Class describing a hmmer database produced by hmmpress"""

    file_ext = 'hmmPressed'
    composite_type = 'basic'

    MetadataElement(readonly=True, optional=True, visible=False, no_value=0)

    def __init__(self, **kwd):
        data.Data.__init__(self, **kwd)
        self.add_composite_file('hmm.h3m')
        self.add_composite_file('hmm.h3i')
        self.add_composite_file('hmm.h3f')
        self.add_composite_file('hmm.h3p')

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Folder of multiple files"
            dataset.blurb = "Folder of multiple files"
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return "Folder of multiple files"

    def get_mime(self):
        """Returns the mime type of the datatype"""
        return 'text/plain'
Exemplo n.º 6
0
class ExpressionJson(Json):
    """ Represents the non-data input or output to a tool or workflow.
    """
    file_ext = "json"
    MetadataElement(name="json_type",
                    default=None,
                    desc="JavaScript or JSON type of expression",
                    readonly=True,
                    visible=True,
                    no_value=None)

    def set_meta(self, dataset, **kwd):
        """
        """
        json_type = "null"
        with open(dataset.file_name) as f:
            obj = json.load(f)
            if isinstance(obj, int):
                json_type = "int"
            elif isinstance(obj, float):
                json_type = "float"
            elif isinstance(obj, list):
                json_type = "list"
            elif isinstance(obj, dict):
                json_type = "object"

        dataset.metadata.json_type = json_type
Exemplo n.º 7
0
class TextGrid(Text):
    """Praat Textgrid file for speech annotations

    >>> from galaxy.datatypes.sniff import get_test_fname
    >>> fname = get_test_fname('1_1119_2_22_001.textgrid')
    >>> TextGrid().sniff(fname)
    True
    >>> fname = get_test_fname('drugbank_drugs.cml')
    >>> TextGrid().sniff(fname)
    False

    """

    file_ext = "textgrid"
    header = 'File type = "ooTextFile"\nObject class = "TextGrid"\n'

    blurb = "Praat TextGrid file"

    MetadataElement(name="annotations",
                    default=[],
                    desc="Annotation types",
                    param=ListParameter,
                    readonly=True,
                    visible=True,
                    optional=True,
                    no_value=[])

    def sniff(self, filename):

        with open(filename, 'r') as fd:
            text = fd.read(len(self.header))
            return text == self.header
Exemplo n.º 8
0
class AnnotatedTabular(Tabular):
    """ Tabular file with optional comment block containing JSON to be imported into metadata """
    MetadataElement(name="comment_metadata",
                    desc="comment metadata",
                    param=metadata.DictParameter,
                    visible=False,
                    readonly=True)

    def set_meta(self, dataset, overwrite=True, **kwd):
        Tabular.set_meta(self,
                         dataset,
                         overwrite=overwrite,
                         max_data_lines=None,
                         max_guess_type_data_lines=1000,
                         **kwd)
        if dataset.metadata.comment_metadata is None:
            dataset_comment_metadata = DatasetCommentMetadata(dataset)
            dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy(
            )
            self.set_dataset_metadata_from_comments(dataset)

    def set_dataset_metadata_from_comments(self, dataset):
        pass

    def set_peek(self, dataset, line_count=None, is_multi_byte=False):
        super(Tabular, self).set_peek(dataset,
                                      line_count=line_count,
                                      is_multi_byte=is_multi_byte,
                                      WIDTH='unlimited',
                                      skipchars=['#'])

    def display_peek(self, dataset):
        """Returns formated html of peek"""
        return Tabular.make_html_table(self, dataset, skipchars=['#'])
Exemplo n.º 9
0
class GeminiSQLite(SQlite):
    """Class describing a Gemini Sqlite database """
    MetadataElement(name="gemini_version",
                    default='0.10.0',
                    param=MetadataParameter,
                    desc="Gemini Version",
                    readonly=True,
                    visible=True,
                    no_value='0.10.0')
    file_ext = "gemini.sqlite"

    def set_meta(self, dataset, overwrite=True, **kwd):
        super(GeminiSQLite, self).set_meta(dataset, overwrite=overwrite, **kwd)
        try:
            conn = sqlite.connect(dataset.file_name)
            c = conn.cursor()
            tables_query = "SELECT version FROM version"
            result = c.execute(tables_query).fetchall()
            for version, in result:
                dataset.metadata.gemini_version = version
            # TODO: Can/should we detect even more attributes, such as use of PED file, what was input annotation type, etc.
        except Exception as e:
            log.warn('%s, set_meta Exception: %s', self, e)

    def sniff(self, filename):
        if super(GeminiSQLite, self).sniff(filename):
            gemini_table_names = [
                "gene_detailed", "gene_summary", "resources",
                "sample_genotype_counts", "sample_genotypes", "samples",
                "variant_impacts", "variants", "version"
            ]
            try:
                conn = sqlite.connect(filename)
                c = conn.cursor()
                tables_query = "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
                result = c.execute(tables_query).fetchall()
                result = map(lambda x: x[0], result)
                for table_name in gemini_table_names:
                    if table_name not in result:
                        return False
                return True
            except Exception as e:
                log.warn('%s, sniff Exception: %s', self, e)
        return False

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = "Gemini SQLite Database, version %s" % (
                dataset.metadata.gemini_version or 'unknown')
            dataset.blurb = nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return "Gemini SQLite Database, version %s" % (
                dataset.metadata.gemini_version or 'unknown')
Exemplo n.º 10
0
class DistanceMatrix(Text):
    file_ext = 'mothur.dist'
    """Add metadata elements"""
    MetadataElement(name="sequence_count",
                    default=0,
                    desc="Number of sequences",
                    readonly=True,
                    visible=True,
                    optional=True,
                    no_value='?')

    def init_meta(self, dataset, copy_from=None):
        super(DistanceMatrix, self).init_meta(dataset, copy_from=copy_from)

    def set_meta(self, dataset, overwrite=True, skip=0, **kwd):
        super(DistanceMatrix, self).set_meta(dataset,
                                             overwrite=overwrite,
                                             skip=skip,
                                             **kwd)

        headers = iter_headers(dataset.file_name, sep='\t')
        for line in headers:
            if not line[0].startswith('@'):
                try:
                    dataset.metadata.sequence_count = int(
                        ''.join(line))  # seq count sometimes preceded by tab
                    break
                except Exception as e:
                    if not isinstance(self, PairwiseDistanceMatrix):
                        log.warning("DistanceMatrix set_meta %s" % e)
Exemplo n.º 11
0
class Genealogy(Tabular):
    """Tab delimited data in Genealogy format"""
    file_ext = "pqmp"
    MetadataElement(name="columns",
                    default=3,
                    desc="Number of columns",
                    readonly=True)

    def __init__(self, **kwd):
        """Initialize QTLMap:Genealogy datatype"""
        Tabular.__init__(self, **kwd)

    # self.do_something_else()
    def init_meta(self, dataset, copy_from=None):
        Tabular.init_meta(self, dataset, copy_from=copy_from)

    def sniff(self, filename):
        """
         Format du fichier de genealogy : ID PERE MERE GENERATION
       """
        handle = open(filename)
        line = handle.readline()
        handle.close()
        v = line.split()
        if (len(v) != 4):
            return False

        if (v[3] != "1" and v[3] != "2" and v[3] != "3"):
            return False
        return True
Exemplo n.º 12
0
class Simulation(Tabular):
    """Tab delimited data in Simulation format"""
    file_ext = "sqmp"

    MetadataElement(name="columns",
                    default=3,
                    desc="Number of columns",
                    readonly=True)

    def __init__(self, **kwd):
        """Initialize QTLMap:Simulation datatype"""
        data.Text.__init__(self, **kwd)

    # self.do_something_else()

    def init_meta(self, dataset, copy_from=None):
        Tabular.init_meta(self, dataset, copy_from=copy_from)

    def sniff(self, filename):
        handle = open(filename)
        line = handle.readline()
        handle.close()
        first = line.split()
        if (line.find("Trait") != -1 and line.find("LRTMAX") != -1
                and line.find("Position CHR") != -1
                and line.find("Position DX") != -1):
            return True
        return False
Exemplo n.º 13
0
class OMETiff(Tiff):
    file_ext = "ome.tiff"
    MetadataElement(name="offsets",
                    desc="Offsets File",
                    param=FileParameter,
                    file_ext="json",
                    readonly=True,
                    visible=False,
                    optional=True)

    def set_meta(self, dataset, overwrite=True, **kwd):
        spec_key = 'offsets'
        offsets_file = dataset.metadata.offsets
        if not offsets_file:
            offsets_file = dataset.metadata.spec[spec_key].param.new_file(
                dataset=dataset)
        with tifffile.TiffFile(dataset.file_name) as tif:
            offsets = [page.offset for page in tif.pages]
        with open(offsets_file.file_name, 'w') as f:
            json.dump(offsets, f)
        dataset.metadata.offsets = offsets_file

    def sniff(self, filename):
        with tifffile.TiffFile(filename) as tif:
            if tif.is_ome:
                return True
        return False
Exemplo n.º 14
0
class Group(Tabular):
    file_ext = 'mothur.groups'
    MetadataElement(name="groups",
                    default=[],
                    desc="Group Names",
                    readonly=True,
                    visible=True,
                    no_value=[])

    def __init__(self, **kwd):
        """
        http://www.mothur.org/wiki/Groups_file
        Group file assigns sequence (col 1)  to a group (col 2)
        """
        super(Group, self).__init__(**kwd)
        self.column_names = ['name', 'group']
        self.columns = 2

    def set_meta(self,
                 dataset,
                 overwrite=True,
                 skip=None,
                 max_data_lines=None,
                 **kwd):
        super(Group, self).set_meta(dataset, overwrite, skip, max_data_lines)

        group_names = set()
        headers = iter_headers(dataset.file_name, sep='\t', count=-1)
        for line in headers:
            if len(line) > 1:
                group_names.add(line[1])
        dataset.metadata.groups = list(group_names)
Exemplo n.º 15
0
class MauveXmfa(Text):
    file_ext = "xmfa"

    MetadataElement(name="number_of_models",
                    default=0,
                    desc="Number of alignmened sequences",
                    readonly=True,
                    visible=True,
                    optional=True,
                    no_value=0)

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = get_file_peek(dataset.file_name,
                                         is_multi_byte=is_multi_byte)
            if (dataset.metadata.number_of_models == 1):
                dataset.blurb = "1 alignment"
            else:
                dataset.blurb = "%s alignments" % dataset.metadata.number_of_models
            dataset.peek = get_file_peek(dataset.file_name,
                                         is_multi_byte=is_multi_byte)
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disc'

    def sniff(self, filename):
        with open(filename, 'r') as handle:
            return handle.read(21) == '#FormatVersion Mauve1'
        return False

    def set_meta(self, dataset, **kwd):
        dataset.metadata.number_of_models = generic_util.count_special_lines(
            '^#Sequence([[:digit:]]+)Entry', dataset.file_name)
Exemplo n.º 16
0
class ExpressionJson(Json):
    """ Represents the non-data input or output to a tool or workflow.
    """
    file_ext = "json"
    MetadataElement(name="json_type",
                    default=None,
                    desc="JavaScript or JSON type of expression",
                    readonly=True,
                    visible=True,
                    no_value=None)

    def set_meta(self, dataset, **kwd):
        """
        """
        if dataset.has_data():
            json_type = "null"
            file_path = dataset.file_name
            try:
                with open(file_path) as f:
                    obj = json.load(f)
                    if isinstance(obj, int):
                        json_type = "int"
                    elif isinstance(obj, float):
                        json_type = "float"
                    elif isinstance(obj, list):
                        json_type = "list"
                    elif isinstance(obj, dict):
                        json_type = "object"
            except json.decoder.JSONDecodeError:
                with open(file_path) as f:
                    contents = f.read(512)
                raise Exception(f"Invalid JSON encountered {contents}")
            dataset.metadata.json_type = json_type
Exemplo n.º 17
0
class GenericMolFile(data.Text):
    """
        abstract class for most of the molecule files
    """
    MetadataElement(name="number_of_molecules",
                    default=0,
                    desc="Number of molecules",
                    readonly=True,
                    visible=True,
                    optional=True,
                    no_value=0)

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = get_file_peek(dataset.file_name,
                                         is_multi_byte=is_multi_byte)
            if (dataset.metadata.number_of_molecules == 1):
                dataset.blurb = "1 molecule"
            else:
                dataset.blurb = "%s molecules" % dataset.metadata.number_of_molecules
            dataset.peek = data.get_file_peek(dataset.file_name,
                                              is_multi_byte=is_multi_byte)
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def get_mime(self):
        return 'text/plain'
Exemplo n.º 18
0
class PdedJoin(Tabular):
    """Tab delimited data in PdedJoin format"""
    file_ext = "pdedj"

    MetadataElement(name="columns",
                    default=3,
                    desc="Number of columns",
                    readonly=True)

    def __init__(self, **kwd):
        """Initialize QTLMap:PdedJoin datatype"""
        data.Text.__init__(self, **kwd)

    # self.do_something_else()

    def init_meta(self, dataset, copy_from=None):
        Tabular.init_meta(self, dataset, copy_from=copy_from)

    def sniff(self, filename):
        handle = open(filename)
        line = handle.readline()
        handle.close()
        first = line.split()
        if (line.find("Position") != -1 and line.find("Sire") != -1
                and line.find("Dam_Phase") != -1
                and line.find("p(Hs1/Hd1") != -1):
            return True
        return False
Exemplo n.º 19
0
class SnpEffDb(Text):
    """Class describing a SnpEff genome build"""
    file_ext = "snpeffdb"
    MetadataElement(name="genome_version",
                    default="unknown",
                    desc="Genome Version",
                    readonly=True,
                    visible=True,
                    no_value=None)
    MetadataElement(name="snpeff_version",
                    default="SnpEff4.0",
                    desc="SnpEff Version",
                    readonly=True,
                    visible=True,
                    no_value=None)
    MetadataElement(name="regulation",
                    default=[],
                    desc="Regulation Names",
                    readonly=True,
                    visible=True,
                    no_value=[],
                    optional=True)
    MetadataElement(name="annotation",
                    default=[],
                    desc="Annotation Names",
                    readonly=True,
                    visible=True,
                    no_value=[],
                    optional=True)

    def __init__(self, **kwd):
        Text.__init__(self, **kwd)

    # The SnpEff version line was added in SnpEff version 4.1
    def getSnpeffVersionFromFile(self, path):
        snpeff_version = None
        try:
            fh = gzip.open(path, 'rb')
            buf = fh.read(100)
            lines = buf.splitlines()
            m = re.match('^(SnpEff)\s+(\d+\.\d+).*$', lines[0].strip())
            if m:
                snpeff_version = m.groups()[0] + m.groups()[1]
            fh.close()
        except Exception, e:
            return e
        return snpeff_version
Exemplo n.º 20
0
class Vcf( Tabular ):
    """ Variant Call Format for describing SNPs and other simple genome variations. """
    edam_format = "format_3016"
    track_type = "VariantTrack"
    data_sources = { "data": "tabix", "index": "bigwig" }

    file_ext = 'vcf'
    column_names = [ 'Chrom', 'Pos', 'ID', 'Ref', 'Alt', 'Qual', 'Filter', 'Info', 'Format', 'data' ]

    MetadataElement( name="columns", default=10, desc="Number of columns", readonly=True, visible=False )
    MetadataElement( name="column_types", default=['str', 'int', 'str', 'str', 'str', 'int', 'str', 'list', 'str', 'str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False )
    MetadataElement( name="viz_filter_cols", desc="Score column for visualization", default=[5], param=metadata.ColumnParameter, optional=True, multiple=True, visible=False )
    MetadataElement( name="sample_names", default=[], desc="Sample names", readonly=True, visible=False, optional=True, no_value=[] )

    def sniff( self, filename ):
        headers = get_headers( filename, '\n', count=1 )
        return headers[0][0].startswith("##fileformat=VCF")

    def display_peek( self, dataset ):
        """Returns formated html of peek"""
        return Tabular.make_html_table( self, dataset, column_names=self.column_names )

    def set_meta( self, dataset, **kwd ):
        Tabular.set_meta( self, dataset, **kwd )
        source = open( dataset.file_name )

        # Skip comments.
        line = None
        for line in source:
            if not line.startswith( '##' ):
                break

        if line and line.startswith( '#' ):
            # Found header line, get sample names.
            dataset.metadata.sample_names = line.split()[ 9: ]

    # Dataproviders
    @dataproviders.decorators.dataprovider_factory( 'genomic-region',
                                                    dataproviders.dataset.GenomicRegionDataProvider.settings )
    def genomic_region_dataprovider( self, dataset, **settings ):
        return dataproviders.dataset.GenomicRegionDataProvider( dataset, 0, 1, 1, **settings )

    @dataproviders.decorators.dataprovider_factory( 'genomic-region-dict',
                                                    dataproviders.dataset.GenomicRegionDataProvider.settings )
    def genomic_region_dict_dataprovider( self, dataset, **settings ):
        settings[ 'named_columns' ] = True
        return self.genomic_region_dataprovider( dataset, **settings )
Exemplo n.º 21
0
class SnpEffDb( Text ):
    """Class describing a SnpEff genome build"""
    file_ext = "snpeffdb"
    MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None )
    MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True)
    MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True)

    def __init__( self, **kwd ):
        Text.__init__( self, **kwd )

    def set_meta( self, dataset, **kwd ):
        Text.set_meta(self, dataset, **kwd )
        data_dir = dataset.extra_files_path
        ## search data_dir/genome_version for files
        regulation_pattern = 'regulation_(.+).bin'
        #  annotation files that are included in snpEff by a flag
        annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
        regulations = []
        annotations = []
        if data_dir and os.path.isdir(data_dir):
            for root, dirs, files in os.walk(data_dir):
                for fname in files:
                    if fname.startswith('snpEffectPredictor'):
                        # if snpEffectPredictor.bin download succeeded
                        genome_version = os.path.basename(root)
                        dataset.metadata.genome_version = genome_version
                    else:
                        m = re.match(regulation_pattern,fname)
                        if m:
                            name = m.groups()[0]
                            regulations.append(name)
                        elif fname in annotations_dict:
                            value = annotations_dict[fname]
                            name = value.lstrip('-')
                            annotations.append(name)
            dataset.metadata.regulation = regulations
            dataset.metadata.annotation = annotations
            try:
                fh = file(dataset.file_name,'w')
                fh.write("%s\n" % genome_version)
                if annotations:
                    fh.write("annotations: %s\n" % ','.join(annotations))
                if regulations:
                    fh.write("regulations: %s\n" % ','.join(regulations))
                fh.close()
            except:
                pass
Exemplo n.º 22
0
class Model(data.Text):
    """Tab delimited data in Model format"""
    file_ext = "qtlmap.model"

    MetadataElement(name="columns",
                    default=3,
                    desc="Number of columns",
                    readonly=True)

    def __init__(self, **kwd):
        """Initialize QTLMap:Model datatype"""
        data.Text.__init__(self, **kwd)

    # self.do_something_else()

    def init_meta(self, dataset, copy_from=None):
        data.Text.init_meta(self, dataset, copy_from=copy_from)

    def sniff(self, filename):
        """
         Format du fichier de Model
         nb carac
         nbfix nbcov
         car r/a/i 0/1....
       """
        handle = open(filename)
        line = handle.readline()

        #nb carac
        first = line.split()
        if (not first[0].isdigit()):
            return False

        ncar = int(first[0])
        line = handle.readline()
        first = line.split()
        if (not first[0].isdigit()):
            return False
        if (not first[1].isdigit()):
            return False

        nfix = int(first[0])
        ncov = int(first[1])

        #nom des effets
        line = handle.readline()
        first = line.split()
        if (len(first) < (nfix + ncov)):
            return False

        for i in range(ncar):
            line = handle.readline()
            first = line.split()
            if (len(first) < (2 + 2 * nfix + ncov)):
                return False

        handle.close()

        return True
Exemplo n.º 23
0
class FeatureLocationIndex(Tabular):
    """
    An index that stores feature locations in tabular format.
    """
    file_ext = 'fli'
    MetadataElement(name="columns",
                    default=2,
                    desc="Number of columns",
                    readonly=True,
                    visible=False)
    MetadataElement(name="column_types",
                    default=['str', 'str'],
                    param=metadata.ColumnTypesParameter,
                    desc="Column types",
                    readonly=True,
                    visible=False,
                    no_value=[])
Exemplo n.º 24
0
class AnvioSamplesDB(AnvioDB):
    """Class for Anvio Samples DB database files."""
    _anvio_basename = 'SAMPLES.db'
    MetadataElement(name="anvio_basename",
                    default=_anvio_basename,
                    desc="Basename",
                    readonly=True)
    file_ext = 'anvio_samples_db'
Exemplo n.º 25
0
class LastzCoverage(Tabular):
    file_ext = "coverage"

    MetadataElement(name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter)
    MetadataElement(name="positionCol", default=2, desc="Position column", param=metadata.ColumnParameter)
    MetadataElement(name="forwardCol", default=3, desc="Forward or aggregate read column", param=metadata.ColumnParameter)
    MetadataElement(name="reverseCol", desc="Optional reverse read column", param=metadata.ColumnParameter, optional=True, no_value=0)
    MetadataElement(name="columns", default=3, desc="Number of columns", readonly=True, visible=False)

    def get_track_resolution(self, dataset, start, end):
        range = end - start
        # Determine appropriate resolution to plot ~1000 points
        resolution = math.ceil(10 ** math.ceil(math.log10(range / 1000)))
        # Restrict to valid range
        resolution = min(resolution, 10000)
        resolution = max(resolution, 1)
        return resolution
Exemplo n.º 26
0
class AnvioPanDB(AnvioDB):
    """Class for Anvio Pan DB database files."""
    _anvio_basename = 'PAN.db'
    MetadataElement(name="anvio_basename",
                    default=_anvio_basename,
                    desc="Basename",
                    readonly=True)
    file_ext = 'anvio_pan_db'
Exemplo n.º 27
0
class AnvioGenomesDB(AnvioDB):
    """Class for Anvio Genomes DB database files."""
    _anvio_basename = '-GENOMES.db'
    MetadataElement(name="anvio_basename",
                    default=_anvio_basename,
                    desc="Basename",
                    readonly=True)
    file_ext = 'anvio_genomes_db'
Exemplo n.º 28
0
class AnvioStructureDB(AnvioDB):
    """Class for Anvio Structure DB database files."""
    _anvio_basename = 'STRUCTURE.db'
    MetadataElement(name="anvio_basename",
                    default=_anvio_basename,
                    desc="Basename",
                    readonly=True)
    file_ext = 'anvio_structure_db'
Exemplo n.º 29
0
class Bcf(Binary):
    """Class describing a BCF file"""
    edam_format = "format_3020"
    file_ext = "bcf"

    MetadataElement(name="bcf_index",
                    desc="BCF Index File",
                    param=metadata.FileParameter,
                    file_ext="csi",
                    readonly=True,
                    no_value=None,
                    visible=False,
                    optional=True)

    def sniff(self, filename):
        # BCF is compressed in the BGZF format, and must not be uncompressed in Galaxy.
        # The first 3 bytes of any bcf file is 'BCF', and the file is binary.
        try:
            header = gzip.open(filename).read(3)
            if binascii.b2a_hex(header) == binascii.hexlify('BCF'):
                return True
            return False
        except:
            return False

    def set_meta(self, dataset, overwrite=True, **kwd):
        """ Creates the index for the BCF file. """
        # These metadata values are not accessible by users, always overwrite
        index_file = dataset.metadata.bcf_index
        if not index_file:
            index_file = dataset.metadata.spec['bcf_index'].param.new_file(
                dataset=dataset)
        # Create the bcf index
        # $ bcftools index
        # Usage: bcftools index <in.bcf>

        dataset_symlink = os.path.join(
            os.path.dirname(index_file.file_name), '__dataset_%d_%s' %
            (dataset.id, os.path.basename(index_file.file_name)))
        os.symlink(dataset.file_name, dataset_symlink)

        stderr_name = tempfile.NamedTemporaryFile(
            prefix="bcf_index_stderr").name
        command = ['bcftools', 'index', dataset_symlink]
        proc = subprocess.Popen(args=command, stderr=open(stderr_name, 'wb'))
        exit_code = proc.wait()
        shutil.move(dataset_symlink + '.csi', index_file.file_name)

        stderr = open(stderr_name).read().strip()
        if stderr:
            if exit_code != 0:
                os.unlink(stderr_name)  # clean up
                raise Exception("Error Setting BCF Metadata: %s" % stderr)
            else:
                print stderr
        dataset.metadata.bcf_index = index_file
        # Remove temp file
        os.unlink(stderr_name)
Exemplo n.º 30
0
class PlantTribesKsComponents(Tabular):
    file_ext = "ptkscmp"
    MetadataElement(name="number_comp", default=0, desc="Number of significant components in the Ks distribution", readonly=True, visible=True, no_value=0)

    def display_peek(self, dataset):
        try:
            return dataset.peek
        except Exception:
            return "Significant components in the Ks distribution (%s)" % (nice_size(dataset.get_size()))

    def set_meta(self, dataset, **kwd):
        """
        Set the number of significant components in the Ks distribution.
        The dataset will always be on the order of less than 10 lines.
        """
        super(PlantTribesKsComponents, self).set_meta(dataset, **kwd)
        significant_components = []
        with open(dataset.file_name) as fh:
            for i, line in enumerate(fh):
                if i == 0:
                    # Skip the first line.
                    continue
                line = line.strip()
                items = line.split()
                try:
                    # Could be \t.
                    significant_components.append(int(items[2]))
                except Exception:
                    continue
        if len(significant_components) > 0:
            dataset.metadata.number_comp = max(significant_components)

    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek = get_file_peek(dataset.file_name)
            if (dataset.metadata.number_comp == 1):
                dataset.blurb = "1 significant component"
            else:
                dataset.blurb = "%s significant components" % dataset.metadata.number_comp
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'

    def sniff(self, filename):
        """
        >>> from galaxy.datatypes.sniff import get_test_fname
        >>> fname = get_test_fname('test_tab.bed')
        >>> PlantTribesKsComponents().sniff(fname)
        False
        >>> fname = get_test_fname('1.ptkscmp')
        >>> PlantTribesKsComponents().sniff(fname)
        True
        """
        try:
            line_item_str = get_headers(filename, '\\t', 1)[0][0]
            return line_item_str == 'species\tn\tnumber_comp\tlnL\tAIC\tBIC\tmean\tvariance\tporportion'
        except Exception:
            return False