def codes_info(args): if not isinstance(args, list): args = [args] try: ret = _od([(p, _chekout(['codes_info', '-' + p]).strip()) for p in args]) except OSError as e: raise OSError('codes_info tool was not found') if len(ret) == 1: return (ret.values()[0]) return (ret)
def get_sequence_def(masterTableVersionNumber='latest'): """Get sequence.def table :masterTableVersionNumber: WMO master table version Number :return: sequence.def as dict """ path = _codes_definition_path_ + '/bufr/tables/0/wmo/{}/sequence.def' path = path.format(masterTableVersionNumber) if path in _def_catch_.keys(): return(_def_catch_[path]) content = _get_entry_(path) ls = _re.split(r" = \[| \]\n", content) d = _od() for i in range(0, len(ls), 2): if ls[i] != '': k = int(ls[i].replace(' ', '').replace('"', '')) v = [int(j) for j in ls[i + 1].replace(' ', '').split(',')] d[k] = v # this is required to run shrink method properly. d = _od(sorted(d.iteritems(), key=lambda x: len(x[1]))) _def_catch_[path] = d return(d)
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GWpy. If not, see <http://www.gnu.org/licenses/>. """Command-line utilities for GWpy The `gwpy.cli` module provides methods and functionality to power the `gwpy-plot` command-line executable (distributed with GWpy). """ from collections import OrderedDict as _od from .timeseries import TimeSeries from .spectrum import Spectrum from .spectrogram import Spectrogram from .coherence import Coherence from .coherencegram import Coherencegram from .qtransform import Qtransform __author__ = 'Joseph Areeda <*****@*****.**>' PRODUCTS = _od((x.action, x) for x in ( TimeSeries, Spectrum, Spectrogram, Coherence, Coherencegram, Qtransform, ))
class SNP(Base): """An SQLAlchemy Talble for GRASP SNPs. Study and phenotype information are pushed to other tables to minimize table size and make querying easier. Table Name: snps Columns: Described in the columns attribute Attributes: int: The ID number of the SNP, usually the NHLBIkey str: SNP loction expressed as 'chr:pos' hvgs_ids: A list of HGVS IDs for this SNP columns: A dictionary of all columns 'column_name'=>('type', 'desc') """ __tablename__ = "snps" id = _Column(_BigInteger, primary_key=True, index=True) snpid = _Column(_String, index=True) chrom = _Column(_String(10), index=True) pos = _Column(_Integer, index=True) pval = _Column(_Float, index=True) NHLBIkey = _Column(_String, index=True) HUPfield = _Column(_String) LastCurationDate = _Column(_Date) CreationDate = _Column(_Date) population_id = _Column(_Integer, _ForeignKey('populations.id'), index=True) population = _relationship("Population", backref="snps") study_id = _Column(_Integer, _ForeignKey('studies.id'), index=True) study = _relationship("Study", back_populates="snps") study_snpid = _Column(_String, index=True) paper_loc = _Column(_String) phenotype_desc = _Column(_String, index=True) phenotype_cats = _relationship("PhenoCats", secondary=snp_pheno_assoc, back_populates="snps") InGene = _Column(_String, index=True) NearestGene = _Column(_String, index=True) InLincRNA = _Column(_String) InMiRNA = _Column(_String) InMiRNABS = _Column(_String) dbSNPfxn = _Column(_String) dbSNPMAF = _Column(_String) dbSNPinfo = _Column(_String) dbSNPvalidation = _Column(_String) dbSNPClinStatus = _Column(_String) ORegAnno = _Column(_String) ConservPredTFBS = _Column(_String) HumanEnhancer = _Column(_String) RNAedit = _Column(_String) PolyPhen2 = _Column(_String) SIFT = _Column(_String) LSSNP = _Column(_String) UniProt = _Column(_String) EqtlMethMetabStudy = _Column(_String) _Index('chrom_pos', 'chrom', 'pos') columns = _od([ ('id', ('BigInteger', 'NHLBIkey')), ('snpid', ('String', 'SNPid')), ('chrom', ('String', 'chr')), ('pos', ('Integer', 'pos')), ('pval', ('Float', 'Pvalue')), ('NHLBIkey', ('String', 'NHLBIkey')), ('HUPfield', ('String', 'HUPfield')), ('LastCurationDate', ('Date', 'LastCurationDate')), ('CreationDate', ('Date', 'CreationDate')), ('population_id', ('Integer', 'Primary')), ('population', ('relationship', 'Link')), ('study_id', ('Integer', 'Primary')), ('study', ('relationship', 'Link')), ('study_snpid', ('String', 'SNPid')), ('paper_loc', ('String', 'LocationWithinPaper')), ('phenotype_desc', ('String', 'Phenotype')), ('phenotype_cats', ('relationship', 'Link')), ('InGene', ('String', 'InGene')), ('NearestGene', ('String', 'NearestGene')), ('InLincRNA', ('String', 'InLincRNA')), ('InMiRNA', ('String', 'InMiRNA')), ('InMiRNABS', ('String', 'InMiRNABS')), ('dbSNPfxn', ('String', 'dbSNPfxn')), ('dbSNPMAF', ('String', 'dbSNPMAF')), ('dbSNPinfo', ('String', 'dbSNPalleles')), ('dbSNPvalidation', ('String', 'dbSNPvalidation')), ('dbSNPClinStatus', ('String', 'dbSNPClinStatus')), ('ORegAnno', ('String', 'ORegAnno')), ('ConservPredTFBS', ('String', 'ConservPredTFBS')), ('HumanEnhancer', ('String', 'HumanEnhancer')), ('RNAedit', ('String', 'RNAedit')), ('PolyPhen2', ('String', 'PolyPhen2')), ('SIFT', ('String', 'SIFT')), ('LSSNP', ('String', 'LS')), ('UniProt', ('String', 'UniProt')), ('EqtlMethMetabStudy', ('String', 'EqtlMethMetabStudy')), ]) """A description of all columns in this table.""" @property def snp_loc(self): """Return a simple string containing the SNP location.""" return "chr{}:{}".format(self.chrom, self.pos) @property def hvgs_ids(self): """The HVGS ID from myvariant.""" if not hasattr(self, '_hvgs_ids'): mv = myvariant.MyVariantInfo() self._hvgs_ids = [ i['_id'] for i in mv.query(self.snp_loc, fields='id')['hits'] ] return self._hvgs_ids def get_variant_info(self, fields="dbsnp", pandas=True): """Use the myvariant API to get info about this SNP. Note that this service can be very slow. It will be faster to query multiple SNPs. Args: fields: Choose fields to display from: `docs.myvariant.info/en/latest/doc/data.html#available-fields`_ Good choices are 'dbsnp', 'clinvar', or 'gwassnps' Can also use 'grasp' to get a different version of this info. pandas: Return a dataframe instead of dictionary. Returns: A dictionary or a dataframe. """ mv = myvariant.MyVariantInfo() return mv.getvariants(self.hvgs_ids, fields=fields, as_dataframe=pandas, df_index=True) def get_columns(self, return_as='list'): """Return all columns in the table nicely formatted. Display choices: list: A python list of column names dictionary: A python dictionary of name=>desc long_dict: A python dictionary of name=>(type, desc) Args: return_as: {table,tab,list,dictionary,long_dict,id_dict} Returns: A list or dictionary """ cols = self.columns if return_as == 'list': return [i[1] for i in cols.values()] elif return_as == 'dictionary': return {k: v[1] for k, v in cols.items()} elif return_as == 'long_dict': return cols else: raise Exception("'display_as' must be one of {table,tab,list}") def display_columns(self, display_as='table', write=False): """Return all columns in the table nicely formatted. Display choices: table: A formatted grid-like table tab: A tab delimited non-formatted version of table list: A string list of column names Args: display_as: {table,tab,list} write: If true, print output to console, otherwise return string. Returns: A formatted string or None """ cols = self.columns if display_as == 'table': out = _tb( [['Column', 'Description', 'Type']] +\ [[k, v[1], v[0]] for k, v in cols.items()], headers='firstrow', tablefmt='grid' ) elif display_as == 'tab': out = '\n'.join( ['\t'.join(['Column', 'Description', 'Type'])] +\ ['\t'.join([k, v[1], v[0]]) for k, v in cols.items()], ) elif display_as == 'list': out = '\n'.join([i[1] for i in cols.values()]) else: raise Exception("'display_as' must be one of {table,tab,list}") if write: print(out) else: return out def __repr__(self): """Display information about the table.""" return "{} ({}) <{}:{} pheno: {} study: {}".format( self.id, self.snpid, self.chrom, self.pos, self.phenotype_desc, self.study.title) def __int__(self): """Return ID number.""" return self.id def __str__(self): """Return coordinates.""" return self.snp_loc
class Study(Base): """An SQLAlchemy table to store study information. This table provides easy ways to query for SNPs by study information, including population and phenotype. Note: `disc_pop_flag` and `rep_pop_flag` are integer representations of a bitwise flag describing population, defined in ref.PopFlag. To see the string representation of this property, lookup `disc_pops` or `rep_pops`. Table Name: studies Columns: Described in the columns attribute. Attributes: int: The integer ID number, usually the PMID, unless not indexed. str: Summary data on this study. len: The number of individuals in this study. disc_pops: A string displaying the number of discovery poplations. rep_pops: A string displaying the number of replication poplations. columns: A dictionary of all columns 'column_name'=>('type', 'desc') population_information: A multi-line string describing the populations in this study. """ __tablename__ = "studies" id = _Column(_Integer, primary_key=True, index=True) pmid = _Column(_String(100), index=True) title = _Column(_String, index=True) journal = _Column(_String) author = _Column(_String) grasp_ver = _Column(_Integer, index=True) noresults = _Column(_Boolean) results = _Column(_Integer) qtl = _Column(_Boolean) snps = _relationship("SNP", back_populates='study') phenotype_id = _Column(_Integer, _ForeignKey('phenos.id'), index=True) phenotype = _relationship("Phenotype", back_populates="studies") phenotype_cats = _relationship("PhenoCats", secondary=study_pheno_assoc, back_populates="studies") datepub = _Column(_Date) in_nhgri = _Column(_Boolean) locations = _Column(_String) mf = _Column(_Boolean) mf_only = _Column(_Boolean) platforms = _relationship("Platform", secondary=study_plat_assoc, back_populates="studies") snp_count = _Column(_String) imputed = _Column(_Boolean) population_id = _Column(_Integer, _ForeignKey('populations.id'), index=True) population = _relationship("Population", backref="studies") total = _Column(_Integer) total_disc = _Column(_Integer) pop_flag = _Column(_Integer, index=True) # Will hold a bitwise flag disc_pop_flag = _Column(_Integer, index=True) # Will hold a bitwise flag european = _Column(_Integer) african = _Column(_Integer) east_asian = _Column(_Integer) south_asian = _Column(_Integer) hispanic = _Column(_Integer) native = _Column(_Integer) micronesian = _Column(_Integer) arab = _Column(_Integer) mixed = _Column(_Integer) unspecified = _Column(_Integer) filipino = _Column(_Integer) indonesian = _Column(_Integer) total_rep = _Column(_Integer) rep_pop_flag = _Column(_Integer, index=True) # Will hold a bitwise flag rep_european = _Column(_Integer) rep_african = _Column(_Integer) rep_east_asian = _Column(_Integer) rep_south_asian = _Column(_Integer) rep_hispanic = _Column(_Integer) rep_native = _Column(_Integer) rep_micronesian = _Column(_Integer) rep_arab = _Column(_Integer) rep_mixed = _Column(_Integer) rep_unspecified = _Column(_Integer) rep_filipino = _Column(_Integer) rep_indonesian = _Column(_Integer) sample_size = _Column(_String) # Maybe parse this better replication_size = _Column(_String) # Maybe parse this better columns = _od([ ('id', ('Integer', 'id')), ('pmid', ('String', 'PubmedID')), ('title', ('String', 'Study')), ('journal', ('String', 'Journal')), ('author', ('String', '1st_author')), ('grasp_ver', ('Integer', 'GRASPversion?')), ('noresults', ('Boolean', 'No results flag')), ('results', ('Integer', '#results')), ('qtl', ('Boolean', 'IsEqtl/meQTL/pQTL/gQTL/Metabolmics?')), ('snps', ('relationship', 'Link to all SNPs in this study')), ('phenotype_id', ('Integer', 'ID of primary phenotype in Phenotype table')), ('phenotype', ('relationship', 'A link to the primary phenotype in the Phenotype table')), ('phenotype_cats', ('relationship', 'A link to all phenotype categories assigned in the PhenoCats table') ), ('datepub', ('Date', 'DatePub')), ('in_nhgri', ('Boolean', 'In NHGRI GWAS catalog (8/26/14)?')), ('locations', ('String', 'Specific place(s) mentioned for samples')), ('mf', ('Boolean', 'Includes male/female only analyses in discovery and/or replication?' )), ('mf_only', ('Boolean', 'Exclusively male or female study?')), ('platforms', ('relationship', 'Link to platforms in the Platform table. Platform [SNPs passing QC]' )), ('snp_count', ('String', 'From "Platform [SNPs passing QC]"')), ('imputed', ('Boolean', 'From "Platform [SNPs passing QC]"')), ('population_id', ('Integer', 'Primary key of population table')), ('population', ('relationship', 'GWAS description, link to table')), ('total', ('Integer', 'Total Discovery + Replication sample size')), ('total_disc', ('Integer', 'Total discovery samples')), ('pop_flag', ('Integer', 'A bitwise flag that shows presence/absence of all populations (discovery and replication)' )), ('disc_pop_flag', ('Integer', 'A bitwise flag that shows presence/absence of discovery populations' )), ('european', ('Integer', 'European')), ('african', ('Integer', 'African ancestry')), ('east_asian', ('Integer', 'East Asian')), ('south_asian', ('Integer', 'Indian/South Asian')), ('hispanic', ('Integer', 'Hispanic')), ('native', ('Integer', 'Native')), ('micronesian', ('Integer', 'Micronesian')), ('arab', ('Integer', 'Arab/ME')), ('mixed', ('Integer', 'Mixed')), ('unpecified', ('Integer', 'Unspec')), ('filipino', ('Integer', 'Filipino')), ('indonesian', ('Integer', 'Indonesian')), ('total_rep', ('Integer', 'Total replication samples')), ('rep_pop_flag', ('Integer', 'A bitwise flag that shows presence/absence of replication populations' )), ('rep_european', ('Integer', 'European.1')), ('rep_african', ('Integer', 'African ancestry.1')), ('rep_east_asian', ('Integer', 'East Asian.1')), ('rep_south_asian', ('Integer', 'Indian/South Asian.1')), ('rep_hispanic', ('Integer', 'Hispanic.1')), ('rep_native', ('Integer', 'Native.1')), ('rep_micronesian', ('Integer', 'Micronesian.1')), ('rep_arab', ('Integer', 'Arab/ME.1')), ('rep_mixed', ('Integer', 'Mixed.1')), ('rep_unpecified', ('Integer', 'Unspec.1')), ('rep_filipino', ('Integer', 'Filipino.1')), ('rep_indonesian', ('Integer', 'Indonesian.1')), ('sample_size', ('String', 'Initial Sample Size, string description of integer population counts above.' )), ('replication_size', ('String', 'Replication Sample Size, string description of integer population counts above.' )), ]) """A description of all columns in this table.""" @property def disc_pops(self): """Convert disc_pop_flag to PopFlag.""" return _PopFlag(self.disc_pop_flag) @property def rep_pops(self): """Convert rep_pop_flag to PopFlag.""" return _PopFlag(self.rep_pop_flag) @property def pops(self): """Convert rep_pop_flag to PopFlag.""" return _PopFlag(self.pop_flag) @property def population_information(self): """Display a summary of population data.""" outstr = [ "Primary population: {}\n".format(self.population.population), "Individuals: {}\n".format(self.total), "Discovery populations: {}; Total: {}\n".format( self.disc_pops.to_simple_str(), self.total_disc) ] for pop in [ 'european', 'african', 'east_asian', 'south_asian', 'hispanic', 'native', 'micronesian', 'arab', 'unspecified', 'filipino', 'indonesian' ]: outstr.append('\t{}: {}\n'.format(pop, eval('self.' + pop))) outstr.append("Replication populations: {}; Total: {}\n".format( self.rep_pops.to_simple_str(), self.total_rep)) for pop in [ 'european', 'african', 'east_asian', 'south_asian', 'hispanic', 'native', 'micronesian', 'arab', 'unspecified', 'filipino', 'indonesian' ]: outstr.append('\t{}: {}\n'.format(pop, eval('self.rep_' + pop))) def get_columns(self, return_as='list'): """Return all columns in the table nicely formatted. Display choices: list: A python list of column names dictionary: A python dictionary of name=>desc long_dict: A python dictionary of name=>(type, desc) Args: return_as: {table,tab,list,dictionary,long_dict,id_dict} Returns: A list or dictionary """ cols = self.columns if return_as == 'list': return [i[1] for i in cols.values()] elif return_as == 'dictionary': return {k: v[1] for k, v in cols.items()} elif return_as == 'long_dict': return cols else: raise Exception("'display_as' must be one of {table,tab,list}") def display_columns(self, display_as='table', write=False): """Return all columns in the table nicely formatted. Display choices: table: A formatted grid-like table tab: A tab delimited non-formatted version of table list: A string list of column names Args: display_as: {table,tab,list} write: If true, print output to console, otherwise return string. Returns: A formatted string or None """ cols = self.columns if display_as == 'table': out = _tb( [['Column', 'Description', 'Type']] +\ [[k, v[1], v[0]] for k, v in cols.items()], headers='firstrow', tablefmt='grid' ) elif display_as == 'tab': out = '\n'.join( ['\t'.join(['Column', 'Description', 'Type'])] +\ ['\t'.join([k, v[1], v[0]]) for k, v in cols.items()], ) elif display_as == 'list': out = '\n'.join([i[1] for i in cols.values()]) else: raise Exception("'display_as' must be one of {table,tab,list}") if write: print(out) else: return out def __repr__(self): """Display informaertn about this study.""" return '{} <{}:{} "{}" ({}; Pop: {}; Disc Pops: {}; Rep Pops: {})>'.\ format(self.id, self.author, self.journal, self.title, self.phenotype.phenotype, self.population.population, self.disc_pops.to_simple_str(), self.rep_pops.to_simple_str()) def __str__(self): """Display refertnce.""" return "{}: {} ({})\nSNPS: {}\nInds: {}\n".format( self.journal, self.title, self.author, self.snp_count, self.total, ) + "Disc Pops: {}; Rep Pops: {}; EUR: {}; AFR: {}".format( self.disc_pops.to_simple_str, self.rep_pops.to_simple_str, self.european, self.african) def __int__(self): """Return ID number.""" return self.id def __len__(self): """Return total individual count.""" return int(self.total)
def __init__(self, SparseDict=_od()): self._SparseDict = SparseDict self._list = self._SparseDict.keys() self._ndx = 0
def __init__(self, size): self._length = size self._size = 0 self._elements = _od() self._clear = 0 self._ndx = 0
def get_file_description_dic(fname,ext,name_key='name',id_key='rn',\ sep='_', lower_keys=False, lower_values=False, path=True): """ convert fname to dictionary assume fname is in form: name_id_key0_val0_key1_val1_...._.ext Parameters ========== fname: string ext: string (will be stripped) name_key: key form name (=None=> no return) id_key: key for id sep: separator between keys/values (default="_") lower_keys: bool (default False) downcase keys lower_values: vool (default False) downcase values path : bool (default True) if True, include path Returns ======= dic of {key:val...} """ y=_os.path.basename(fname).strip(ext).split('_') d=_od() if name_key is not None: d[name_key]=y[0] d[id_key]=y[1] #add in reset k=y[2::2] if lower_keys: k=[x.lower() for x in k] v=y[3::2] if lower_values: v=[x.lower() for x in v] d.update(_od( zip(k,v) )) #convert to int or float for k in d: d[k]=try_string_conversion(d[k]) # try: # d[k]=_ast.literal_eval(d[k]) # except: # pass if path: d['path'] = fname return d
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GWpy. If not, see <http://www.gnu.org/licenses/>. """Command-line utilities for GWpy The `gwpy.cli` module provides methods and functionality to power the `gwpy-plot` command-line executable (distributed with GWpy). """ from collections import OrderedDict as _od from .timeseries import TimeSeries from .spectrum import Spectrum from .spectrogram import Spectrogram from .coherence import Coherence from .coherencegram import Coherencegram from .qtransform import Qtransform __author__ = 'Joseph Areeda <*****@*****.**>' PRODUCTS = _od((x.action, x) for x in ( TimeSeries, Spectrum, Spectrogram, Coherence, Coherencegram, Qtransform, ))