Exemplo n.º 1
0
def codes_info(args):
    if not isinstance(args, list):
        args = [args]
    try:
        ret = _od([(p, _chekout(['codes_info', '-' + p]).strip())
                   for p in args])
    except OSError as e:
        raise OSError('codes_info tool was not found')
    if len(ret) == 1:
        return (ret.values()[0])
    return (ret)
Exemplo n.º 2
0
def get_sequence_def(masterTableVersionNumber='latest'):
    """Get sequence.def table

    :masterTableVersionNumber: WMO master table version Number
    :return: sequence.def as dict
    """
    path = _codes_definition_path_ + '/bufr/tables/0/wmo/{}/sequence.def'
    path = path.format(masterTableVersionNumber)
    if path in _def_catch_.keys():
        return(_def_catch_[path])
    content = _get_entry_(path)
    ls = _re.split(r" = \[| \]\n", content)
    d = _od()
    for i in range(0, len(ls), 2):
        if ls[i] != '':
            k = int(ls[i].replace(' ', '').replace('"', ''))
            v = [int(j) for j in ls[i + 1].replace(' ', '').split(',')]
            d[k] = v
    # this is required to run shrink method properly.
    d = _od(sorted(d.iteritems(), key=lambda x: len(x[1])))
    _def_catch_[path] = d
    return(d)
Exemplo n.º 3
0
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GWpy.  If not, see <http://www.gnu.org/licenses/>.
"""Command-line utilities for GWpy

The `gwpy.cli` module provides methods and functionality to power the
`gwpy-plot` command-line executable (distributed with GWpy).
"""

from collections import OrderedDict as _od

from .timeseries import TimeSeries
from .spectrum import Spectrum
from .spectrogram import Spectrogram
from .coherence import Coherence
from .coherencegram import Coherencegram
from .qtransform import Qtransform

__author__ = 'Joseph Areeda <*****@*****.**>'

PRODUCTS = _od((x.action, x) for x in (
    TimeSeries,
    Spectrum,
    Spectrogram,
    Coherence,
    Coherencegram,
    Qtransform,
))
Exemplo n.º 4
0
class SNP(Base):
    """An SQLAlchemy Talble for GRASP SNPs.

    Study and phenotype information are pushed to other tables to minimize
    table size and make querying easier.

    Table Name:
        snps

    Columns:
        Described in the columns attribute

    Attributes:
        int:      The ID number of the SNP, usually the NHLBIkey
        str:      SNP loction expressed as 'chr:pos'
        hvgs_ids: A list of HGVS IDs for this SNP
        columns:  A dictionary of all columns 'column_name'=>('type', 'desc')
    """

    __tablename__ = "snps"

    id = _Column(_BigInteger, primary_key=True, index=True)
    snpid = _Column(_String, index=True)
    chrom = _Column(_String(10), index=True)
    pos = _Column(_Integer, index=True)
    pval = _Column(_Float, index=True)
    NHLBIkey = _Column(_String, index=True)
    HUPfield = _Column(_String)
    LastCurationDate = _Column(_Date)
    CreationDate = _Column(_Date)
    population_id = _Column(_Integer,
                            _ForeignKey('populations.id'),
                            index=True)
    population = _relationship("Population", backref="snps")
    study_id = _Column(_Integer, _ForeignKey('studies.id'), index=True)
    study = _relationship("Study", back_populates="snps")
    study_snpid = _Column(_String, index=True)
    paper_loc = _Column(_String)
    phenotype_desc = _Column(_String, index=True)
    phenotype_cats = _relationship("PhenoCats",
                                   secondary=snp_pheno_assoc,
                                   back_populates="snps")
    InGene = _Column(_String, index=True)
    NearestGene = _Column(_String, index=True)
    InLincRNA = _Column(_String)
    InMiRNA = _Column(_String)
    InMiRNABS = _Column(_String)
    dbSNPfxn = _Column(_String)
    dbSNPMAF = _Column(_String)
    dbSNPinfo = _Column(_String)
    dbSNPvalidation = _Column(_String)
    dbSNPClinStatus = _Column(_String)
    ORegAnno = _Column(_String)
    ConservPredTFBS = _Column(_String)
    HumanEnhancer = _Column(_String)
    RNAedit = _Column(_String)
    PolyPhen2 = _Column(_String)
    SIFT = _Column(_String)
    LSSNP = _Column(_String)
    UniProt = _Column(_String)
    EqtlMethMetabStudy = _Column(_String)

    _Index('chrom_pos', 'chrom', 'pos')

    columns = _od([
        ('id', ('BigInteger', 'NHLBIkey')),
        ('snpid', ('String', 'SNPid')),
        ('chrom', ('String', 'chr')),
        ('pos', ('Integer', 'pos')),
        ('pval', ('Float', 'Pvalue')),
        ('NHLBIkey', ('String', 'NHLBIkey')),
        ('HUPfield', ('String', 'HUPfield')),
        ('LastCurationDate', ('Date', 'LastCurationDate')),
        ('CreationDate', ('Date', 'CreationDate')),
        ('population_id', ('Integer', 'Primary')),
        ('population', ('relationship', 'Link')),
        ('study_id', ('Integer', 'Primary')),
        ('study', ('relationship', 'Link')),
        ('study_snpid', ('String', 'SNPid')),
        ('paper_loc', ('String', 'LocationWithinPaper')),
        ('phenotype_desc', ('String', 'Phenotype')),
        ('phenotype_cats', ('relationship', 'Link')),
        ('InGene', ('String', 'InGene')),
        ('NearestGene', ('String', 'NearestGene')),
        ('InLincRNA', ('String', 'InLincRNA')),
        ('InMiRNA', ('String', 'InMiRNA')),
        ('InMiRNABS', ('String', 'InMiRNABS')),
        ('dbSNPfxn', ('String', 'dbSNPfxn')),
        ('dbSNPMAF', ('String', 'dbSNPMAF')),
        ('dbSNPinfo', ('String', 'dbSNPalleles')),
        ('dbSNPvalidation', ('String', 'dbSNPvalidation')),
        ('dbSNPClinStatus', ('String', 'dbSNPClinStatus')),
        ('ORegAnno', ('String', 'ORegAnno')),
        ('ConservPredTFBS', ('String', 'ConservPredTFBS')),
        ('HumanEnhancer', ('String', 'HumanEnhancer')),
        ('RNAedit', ('String', 'RNAedit')),
        ('PolyPhen2', ('String', 'PolyPhen2')),
        ('SIFT', ('String', 'SIFT')),
        ('LSSNP', ('String', 'LS')),
        ('UniProt', ('String', 'UniProt')),
        ('EqtlMethMetabStudy', ('String', 'EqtlMethMetabStudy')),
    ])
    """A description of all columns in this table."""
    @property
    def snp_loc(self):
        """Return a simple string containing the SNP location."""
        return "chr{}:{}".format(self.chrom, self.pos)

    @property
    def hvgs_ids(self):
        """The HVGS ID from myvariant."""
        if not hasattr(self, '_hvgs_ids'):
            mv = myvariant.MyVariantInfo()
            self._hvgs_ids = [
                i['_id'] for i in mv.query(self.snp_loc, fields='id')['hits']
            ]
        return self._hvgs_ids

    def get_variant_info(self, fields="dbsnp", pandas=True):
        """Use the myvariant API to get info about this SNP.

        Note that this service can be very slow.
        It will be faster to query multiple SNPs.

        Args:
            fields: Choose fields to display from:
                    `docs.myvariant.info/en/latest/doc/data.html#available-fields`_
                    Good choices are 'dbsnp', 'clinvar', or 'gwassnps'
                    Can also use 'grasp' to get a different version of this
                    info.
            pandas: Return a dataframe instead of dictionary.

        Returns:
            A dictionary or a dataframe.
        """
        mv = myvariant.MyVariantInfo()
        return mv.getvariants(self.hvgs_ids,
                              fields=fields,
                              as_dataframe=pandas,
                              df_index=True)

    def get_columns(self, return_as='list'):
        """Return all columns in the table nicely formatted.

        Display choices:
            list:       A python list of column names
            dictionary: A python dictionary of name=>desc
            long_dict:  A python dictionary of name=>(type, desc)

        Args:
            return_as:  {table,tab,list,dictionary,long_dict,id_dict}

        Returns:
            A list or dictionary
        """
        cols = self.columns
        if return_as == 'list':
            return [i[1] for i in cols.values()]
        elif return_as == 'dictionary':
            return {k: v[1] for k, v in cols.items()}
        elif return_as == 'long_dict':
            return cols
        else:
            raise Exception("'display_as' must be one of {table,tab,list}")

    def display_columns(self, display_as='table', write=False):
        """Return all columns in the table nicely formatted.

        Display choices:
            table:      A formatted grid-like table
            tab:        A tab delimited non-formatted version of table
            list:       A string list of column names

        Args:
            display_as: {table,tab,list}
            write:      If true, print output to console, otherwise return
                        string.

        Returns:
            A formatted string or None
        """
        cols = self.columns
        if display_as == 'table':
            out = _tb(
                [['Column', 'Description', 'Type']] +\
                [[k, v[1], v[0]] for k, v in cols.items()],
                headers='firstrow', tablefmt='grid'
            )
        elif display_as == 'tab':
            out = '\n'.join(
                ['\t'.join(['Column', 'Description', 'Type'])] +\
                ['\t'.join([k, v[1], v[0]]) for k, v in cols.items()],
            )
        elif display_as == 'list':
            out = '\n'.join([i[1] for i in cols.values()])
        else:
            raise Exception("'display_as' must be one of {table,tab,list}")
        if write:
            print(out)
        else:
            return out

    def __repr__(self):
        """Display information about the table."""
        return "{} ({}) <{}:{} pheno: {} study: {}".format(
            self.id, self.snpid, self.chrom, self.pos, self.phenotype_desc,
            self.study.title)

    def __int__(self):
        """Return ID number."""
        return self.id

    def __str__(self):
        """Return coordinates."""
        return self.snp_loc
Exemplo n.º 5
0
class Study(Base):
    """An SQLAlchemy table to store study information.

    This table provides easy ways to query for SNPs by study information,
    including population and phenotype.

    Note: `disc_pop_flag` and `rep_pop_flag` are integer representations of
    a bitwise flag describing population, defined in ref.PopFlag. To see the
    string representation of this property, lookup `disc_pops` or `rep_pops`.

    Table Name:
        studies

    Columns:
        Described in the columns attribute.

    Attributes:
        int:       The integer ID number, usually the PMID, unless not indexed.
        str:       Summary data on this study.
        len:       The number of individuals in this study.
        disc_pops: A string displaying the number of discovery poplations.
        rep_pops:  A string displaying the number of replication poplations.
        columns:   A dictionary of all columns 'column_name'=>('type', 'desc')

        population_information:
            A multi-line string describing the populations in this study.

    """

    __tablename__ = "studies"

    id = _Column(_Integer, primary_key=True, index=True)
    pmid = _Column(_String(100), index=True)
    title = _Column(_String, index=True)
    journal = _Column(_String)
    author = _Column(_String)
    grasp_ver = _Column(_Integer, index=True)
    noresults = _Column(_Boolean)
    results = _Column(_Integer)
    qtl = _Column(_Boolean)
    snps = _relationship("SNP", back_populates='study')
    phenotype_id = _Column(_Integer, _ForeignKey('phenos.id'), index=True)
    phenotype = _relationship("Phenotype", back_populates="studies")
    phenotype_cats = _relationship("PhenoCats",
                                   secondary=study_pheno_assoc,
                                   back_populates="studies")
    datepub = _Column(_Date)
    in_nhgri = _Column(_Boolean)
    locations = _Column(_String)
    mf = _Column(_Boolean)
    mf_only = _Column(_Boolean)
    platforms = _relationship("Platform",
                              secondary=study_plat_assoc,
                              back_populates="studies")
    snp_count = _Column(_String)
    imputed = _Column(_Boolean)
    population_id = _Column(_Integer,
                            _ForeignKey('populations.id'),
                            index=True)
    population = _relationship("Population", backref="studies")
    total = _Column(_Integer)
    total_disc = _Column(_Integer)
    pop_flag = _Column(_Integer, index=True)  # Will hold a bitwise flag
    disc_pop_flag = _Column(_Integer, index=True)  # Will hold a bitwise flag
    european = _Column(_Integer)
    african = _Column(_Integer)
    east_asian = _Column(_Integer)
    south_asian = _Column(_Integer)
    hispanic = _Column(_Integer)
    native = _Column(_Integer)
    micronesian = _Column(_Integer)
    arab = _Column(_Integer)
    mixed = _Column(_Integer)
    unspecified = _Column(_Integer)
    filipino = _Column(_Integer)
    indonesian = _Column(_Integer)
    total_rep = _Column(_Integer)
    rep_pop_flag = _Column(_Integer, index=True)  # Will hold a bitwise flag
    rep_european = _Column(_Integer)
    rep_african = _Column(_Integer)
    rep_east_asian = _Column(_Integer)
    rep_south_asian = _Column(_Integer)
    rep_hispanic = _Column(_Integer)
    rep_native = _Column(_Integer)
    rep_micronesian = _Column(_Integer)
    rep_arab = _Column(_Integer)
    rep_mixed = _Column(_Integer)
    rep_unspecified = _Column(_Integer)
    rep_filipino = _Column(_Integer)
    rep_indonesian = _Column(_Integer)
    sample_size = _Column(_String)  # Maybe parse this better
    replication_size = _Column(_String)  # Maybe parse this better

    columns = _od([
        ('id', ('Integer', 'id')),
        ('pmid', ('String', 'PubmedID')),
        ('title', ('String', 'Study')),
        ('journal', ('String', 'Journal')),
        ('author', ('String', '1st_author')),
        ('grasp_ver', ('Integer', 'GRASPversion?')),
        ('noresults', ('Boolean', 'No results flag')),
        ('results', ('Integer', '#results')),
        ('qtl', ('Boolean', 'IsEqtl/meQTL/pQTL/gQTL/Metabolmics?')),
        ('snps', ('relationship', 'Link to all SNPs in this study')),
        ('phenotype_id', ('Integer',
                          'ID of primary phenotype in Phenotype table')),
        ('phenotype',
         ('relationship',
          'A link to the primary phenotype in the Phenotype table')),
        ('phenotype_cats',
         ('relationship',
          'A link to all phenotype categories assigned in the PhenoCats table')
         ),
        ('datepub', ('Date', 'DatePub')),
        ('in_nhgri', ('Boolean', 'In NHGRI GWAS catalog (8/26/14)?')),
        ('locations', ('String', 'Specific place(s) mentioned for samples')),
        ('mf',
         ('Boolean',
          'Includes male/female only analyses in discovery and/or replication?'
          )),
        ('mf_only', ('Boolean', 'Exclusively male or female study?')),
        ('platforms',
         ('relationship',
          'Link to platforms in the Platform table. Platform [SNPs passing QC]'
          )),
        ('snp_count', ('String', 'From "Platform [SNPs passing QC]"')),
        ('imputed', ('Boolean', 'From "Platform [SNPs passing QC]"')),
        ('population_id', ('Integer', 'Primary key of population table')),
        ('population', ('relationship', 'GWAS description, link to table')),
        ('total', ('Integer', 'Total Discovery + Replication sample size')),
        ('total_disc', ('Integer', 'Total discovery samples')),
        ('pop_flag',
         ('Integer',
          'A bitwise flag that shows presence/absence of all populations (discovery and replication)'
          )),
        ('disc_pop_flag',
         ('Integer',
          'A bitwise flag that shows presence/absence of discovery populations'
          )),
        ('european', ('Integer', 'European')),
        ('african', ('Integer', 'African ancestry')),
        ('east_asian', ('Integer', 'East Asian')),
        ('south_asian', ('Integer', 'Indian/South Asian')),
        ('hispanic', ('Integer', 'Hispanic')),
        ('native', ('Integer', 'Native')),
        ('micronesian', ('Integer', 'Micronesian')),
        ('arab', ('Integer', 'Arab/ME')),
        ('mixed', ('Integer', 'Mixed')),
        ('unpecified', ('Integer', 'Unspec')),
        ('filipino', ('Integer', 'Filipino')),
        ('indonesian', ('Integer', 'Indonesian')),
        ('total_rep', ('Integer', 'Total replication samples')),
        ('rep_pop_flag',
         ('Integer',
          'A bitwise flag that shows presence/absence of replication populations'
          )),
        ('rep_european', ('Integer', 'European.1')),
        ('rep_african', ('Integer', 'African ancestry.1')),
        ('rep_east_asian', ('Integer', 'East Asian.1')),
        ('rep_south_asian', ('Integer', 'Indian/South Asian.1')),
        ('rep_hispanic', ('Integer', 'Hispanic.1')),
        ('rep_native', ('Integer', 'Native.1')),
        ('rep_micronesian', ('Integer', 'Micronesian.1')),
        ('rep_arab', ('Integer', 'Arab/ME.1')),
        ('rep_mixed', ('Integer', 'Mixed.1')),
        ('rep_unpecified', ('Integer', 'Unspec.1')),
        ('rep_filipino', ('Integer', 'Filipino.1')),
        ('rep_indonesian', ('Integer', 'Indonesian.1')),
        ('sample_size',
         ('String',
          'Initial Sample Size, string description of integer population counts above.'
          )),
        ('replication_size',
         ('String',
          'Replication Sample Size, string description of integer population counts above.'
          )),
    ])
    """A description of all columns in this table."""
    @property
    def disc_pops(self):
        """Convert disc_pop_flag to PopFlag."""
        return _PopFlag(self.disc_pop_flag)

    @property
    def rep_pops(self):
        """Convert rep_pop_flag to PopFlag."""
        return _PopFlag(self.rep_pop_flag)

    @property
    def pops(self):
        """Convert rep_pop_flag to PopFlag."""
        return _PopFlag(self.pop_flag)

    @property
    def population_information(self):
        """Display a summary of population data."""
        outstr = [
            "Primary population: {}\n".format(self.population.population),
            "Individuals: {}\n".format(self.total),
            "Discovery populations: {}; Total: {}\n".format(
                self.disc_pops.to_simple_str(), self.total_disc)
        ]
        for pop in [
                'european', 'african', 'east_asian', 'south_asian', 'hispanic',
                'native', 'micronesian', 'arab', 'unspecified', 'filipino',
                'indonesian'
        ]:
            outstr.append('\t{}: {}\n'.format(pop, eval('self.' + pop)))

        outstr.append("Replication populations: {}; Total: {}\n".format(
            self.rep_pops.to_simple_str(), self.total_rep))
        for pop in [
                'european', 'african', 'east_asian', 'south_asian', 'hispanic',
                'native', 'micronesian', 'arab', 'unspecified', 'filipino',
                'indonesian'
        ]:
            outstr.append('\t{}: {}\n'.format(pop, eval('self.rep_' + pop)))

    def get_columns(self, return_as='list'):
        """Return all columns in the table nicely formatted.

        Display choices:
            list:       A python list of column names
            dictionary: A python dictionary of name=>desc
            long_dict:  A python dictionary of name=>(type, desc)

        Args:
            return_as:  {table,tab,list,dictionary,long_dict,id_dict}

        Returns:
            A list or dictionary
        """
        cols = self.columns
        if return_as == 'list':
            return [i[1] for i in cols.values()]
        elif return_as == 'dictionary':
            return {k: v[1] for k, v in cols.items()}
        elif return_as == 'long_dict':
            return cols
        else:
            raise Exception("'display_as' must be one of {table,tab,list}")

    def display_columns(self, display_as='table', write=False):
        """Return all columns in the table nicely formatted.

        Display choices:
            table:      A formatted grid-like table
            tab:        A tab delimited non-formatted version of table
            list:       A string list of column names

        Args:
            display_as: {table,tab,list}
            write:      If true, print output to console, otherwise return
                        string.

        Returns:
            A formatted string or None
        """
        cols = self.columns
        if display_as == 'table':
            out = _tb(
                [['Column', 'Description', 'Type']] +\
                [[k, v[1], v[0]] for k, v in cols.items()],
                headers='firstrow', tablefmt='grid'
            )
        elif display_as == 'tab':
            out = '\n'.join(
                ['\t'.join(['Column', 'Description', 'Type'])] +\
                ['\t'.join([k, v[1], v[0]]) for k, v in cols.items()],
            )
        elif display_as == 'list':
            out = '\n'.join([i[1] for i in cols.values()])
        else:
            raise Exception("'display_as' must be one of {table,tab,list}")
        if write:
            print(out)
        else:
            return out

    def __repr__(self):
        """Display informaertn about this study."""
        return '{} <{}:{} "{}" ({}; Pop: {}; Disc Pops: {}; Rep Pops: {})>'.\
            format(self.id, self.author, self.journal, self.title,
                   self.phenotype.phenotype, self.population.population,
                   self.disc_pops.to_simple_str(),
                   self.rep_pops.to_simple_str())

    def __str__(self):
        """Display refertnce."""
        return "{}: {} ({})\nSNPS: {}\nInds: {}\n".format(
            self.journal,
            self.title,
            self.author,
            self.snp_count,
            self.total,
        ) + "Disc Pops: {}; Rep Pops: {}; EUR: {}; AFR: {}".format(
            self.disc_pops.to_simple_str, self.rep_pops.to_simple_str,
            self.european, self.african)

    def __int__(self):
        """Return ID number."""
        return self.id

    def __len__(self):
        """Return total individual count."""
        return int(self.total)
Exemplo n.º 6
0
 def __init__(self, SparseDict=_od()):
     self._SparseDict = SparseDict
     self._list = self._SparseDict.keys()
     self._ndx = 0
Exemplo n.º 7
0
 def __init__(self, size):
     self._length = size
     self._size = 0
     self._elements = _od()
     self._clear = 0
     self._ndx = 0
Exemplo n.º 8
0
def get_file_description_dic(fname,ext,name_key='name',id_key='rn',\
                             sep='_', lower_keys=False, lower_values=False,
                             path=True):
    """
    convert fname to dictionary

    assume fname is in form:
       name_id_key0_val0_key1_val1_...._.ext

    Parameters
    ==========
    fname: string
    ext: string (will be stripped)
    
    name_key: key form name (=None=> no return)
    id_key:  key for id
    sep:  separator between keys/values (default="_")

    lower_keys: bool (default False)
          downcase keys

    lower_values: vool (default False)
          downcase values
    
    path : bool (default True)
    if True, include path

    Returns
    =======
    dic of {key:val...}
    """
    
    y=_os.path.basename(fname).strip(ext).split('_')

    d=_od()

    if name_key is not None:
        d[name_key]=y[0]


    d[id_key]=y[1]
    
    #add in reset
    k=y[2::2]
    if lower_keys:
        k=[x.lower() for x in k]

    v=y[3::2]
    if lower_values:
        v=[x.lower() for x in v]
        
    
    d.update(_od( zip(k,v) ))

    #convert to int or float
    for k in d:

        d[k]=try_string_conversion(d[k])
        # try:
        #     d[k]=_ast.literal_eval(d[k])
        # except:
        #     pass

    if path:
        d['path'] = fname
            
    return d
Exemplo n.º 9
0
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GWpy.  If not, see <http://www.gnu.org/licenses/>.

"""Command-line utilities for GWpy

The `gwpy.cli` module provides methods and functionality to power the
`gwpy-plot` command-line executable (distributed with GWpy).
"""

from collections import OrderedDict as _od

from .timeseries import TimeSeries
from .spectrum import Spectrum
from .spectrogram import Spectrogram
from .coherence import Coherence
from .coherencegram import Coherencegram
from .qtransform import Qtransform

__author__ = 'Joseph Areeda <*****@*****.**>'

PRODUCTS = _od((x.action, x) for x in (
    TimeSeries,
    Spectrum,
    Spectrogram,
    Coherence,
    Coherencegram,
    Qtransform,
))