示例#1
0
    def expression_data(self, id_type='gene'):
        ''' 
        if id_type == None, choose one based on which data exists
        return (id_type, data) where data is a dict: k=id, v=expression value
        raises exceptions on errors, so be careful
        '''

        # determine id_type if necessary, checking for existance as well:
        if id_type == None:
            id_type = self._get_id_type()
        if not re.search('^gene|probe$', id_type):
            raise Exception('id_type must be one of "gene" or "probe"')

        # open data file:
        data_file = open(self.data_path(id_type=id_type), 'r')
        if id_type == 'probe':
            burn_line = data_file.readline()
            burn_line = data_file.readline()

        # read data and store to dict:
        data = {}
        for line in data_file:
            l = re.split('[,\s]+', line)
            if l[0] in data:
                warn("Sample.expression_data: overwriting %s %f->%f" %
                     (l[0], data[l[0]], l[1]))
            data[l[0]] = float(l[1])
        data_file.close()

        return (id_type, data)
示例#2
0
    def sh_cmdline(self):
        try:
            usage=self['usage']
            if usage==None:
                usage=''
        except KeyError:
            usage=''

        # look for exe in path, unless exe is an absolute path
        try:
            if os.path.abspath(self['exe'])!=self['exe']:
                self['exe']=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'), 'programs', self['exe'])
        except KeyError as ae:          # not all steps have self['exe']; eg header, footer
            pass


        try:
            return usage % self   

        # fixme: you don't really know what you're doing in these except blocks...
        except KeyError as e:
            raise ConfigError("Missing value %s in\n%s" % (e.args, self.name))
        except AttributeError as e:
            raise ConfigError("Missing value %s in\n%s" % (e.args, self.name))
        except ValueError as e:
            warn(e)
            warn("%s.usage: %s" % (self.name,usage))
            raise "%s.keys(): %s" % (self.name, ", ".join(self.__dict__.keys()))
        except TypeError as te:
            raise ConfigError("step %s: usage='%s': %s" % (self.name, usage, te))
示例#3
0
    def insert_geo(self, geo):
        '''
        insert all words associated with a geo object into the word2geo db
        '''
        self.mongo().remove({'geo_id': geo.geo_id})

        # words: k=tag, v=list of sanitized words (may have dups)
        words=self.get_field_words(geo)
        if hasattr(geo, 'pubmed_id'):
            if type(geo.pubmed_id)==type([]):
                for pmid in [int(x) for x in geo.pubmed_id]:
                    words.update(self.get_pubmed_words(pmid))
            else:
                words.update(self.get_pubmed_words(int(geo.pubmed_id)))
        
        totals=dict()
        for source, words in words.items():
            for word in words:
                warn("%s: adding %s:%s" % (geo.geo_id, source, word))
                query={'geo_id':geo.geo_id, 'word':word, 'source':source}
                record=self.mongo().find_one(query)
                if record:
                    if 'count' in record: record['count']+=1
                    else: record['count']=1
                else:
                    record=query
                    record['count']=1
                self.mongo().save(record)

                try: totals[source]+=1
                except: totals[source]=1


        warn("%s: %s" % (geo.geo_id, totals))
        return
示例#4
0
    def expression_data(self, id_type='gene'):
        ''' 
        if id_type == None, choose one based on which data exists
        return (id_type, data) where data is a dict: k=id, v=expression value
        raises exceptions on errors, so be careful
        '''

        # determine id_type if necessary, checking for existance as well:
        if id_type == None:
            id_type=self._get_id_type()
        if not re.search('^gene|probe$', id_type):
            raise Exception('id_type must be one of "gene" or "probe"')


        # open data file:
        data_file=open(self.data_path(id_type=id_type), 'r')
        if id_type == 'probe': 
            burn_line=data_file.readline()
            burn_line=data_file.readline()

        # read data and store to dict:
        data={}
        for line in data_file:
            l=re.split('[,\s]+', line)
            if l[0] in data:
                warn("Sample.expression_data: overwriting %s %f->%f" % (l[0], data[l[0]], l[1]))
            data[l[0]]=float(l[1])
        data_file.close()

        return (id_type, data)
示例#5
0
def get_field_words(geo):
    '''
    collect words from certain fields in the record:
    '''
    debug='DEBUG' in os.environ
    words={}                # k=field, v=[w1, w2, w3, ...] (w's can be "windows")
    word_fields=['title', 'description', 'summary']
    for field in word_fields:
        words[field]=[]     
        if hasattr(geo, field):
            field_words=getattr(geo, field) # can be a string, a list of single words, or a list of paragraphs
            if type(field_words) != list:
                field_words=[field_words]

            if len(field_words)==0:
                if debug: warn("does this ever happen?" % ())
                continue

            i=0
            for wl in field_words:
#                if debug: warn("\n%s[%d]: wl(%s, %d) is %s" % (field, i, type(wl), len(wl), wl))
                i+=1
                # wrap this in a loop n=(1..3)
                # replace sanitized_list() with str_windows(wl, n)
                for n in range(1,4): # gives 1,2,3
                    if len(wl)>=n:
                        windows=str_windows(wl, n, '[-_\s]+')
#                        if debug: warn("%s(%d): %d windows " % (field, n, len(windows)))
                        for w in windows:
                            words[field].append(w)
                    else:
                        if debug: warn("skipping %s(%d): len(wl)=%d" % (field, n, len(wl)))
                            
    return words
示例#6
0
    def test_bowtie(self):
        sy = superyaml(domain="/proj/hoodlab/share/vcassen/rna-seq/rnaseq/templates")
        d_sy = sy.load("bowtie.syml", {})
        warn(yaml.dump(d_sy))

        d_sy = sy.load("bowtie.syml", d_sy)
        warn(yaml.dump(d_sy))
        self.assertEquals(d_sy["exe"], "bowtie", 'got d[exe]=%s, expected "bowtie"' % d_sy["exe"])
示例#7
0
    def test_all_ids_with_data(self):
        for id_type in ['probe', 'gene']:
            warn("calling Sample.all_ids_with_data(id_type='%s'...)" % id_type)
            all_samples=Sample.all_ids_with_data(id_type='probe')

            self.assertIsInstance(all_samples, list)
            self.assertIn('GSM32106', all_samples) # it's in both lists
            self.assertNotIn('GSM1', all_samples)
示例#8
0
    def test_all_ids_with_data(self):
        for id_type in ['probe', 'gene']:
            warn("calling Sample.all_ids_with_data(id_type='%s'...)" % id_type)
            all_samples = Sample.all_ids_with_data(id_type='probe')

            self.assertIsInstance(all_samples, list)
            self.assertIn('GSM32106', all_samples)  # it's in both lists
            self.assertNotIn('GSM1', all_samples)
示例#9
0
    def ensure_indexes(self):
        if hasattr(self, 'indexes'):
            for index_spec in self.indexes:
#                warn("indexing %s with %s" % (self.__name__, index_spec))
                try:
                    kwargs=index_spec['options']
                    self.mongo().ensure_index(index_spec['keys'])
                except AttributeError as ae:
                    warn("caught %s for %s" % (ae, index_spec))
示例#10
0
    def test_mongo(self):
        self.assertIsInstance(Sample.mongo(), pymongo.collection.Collection)
        cursor=Sample.mongo().find()
        warn("cursor: got %d records" % cursor.count())
        self.assertTrue(cursor.count() > 1)

        record=cursor.next()
        self.assertTrue('geo_id' in record)
        self.assertTrue('_id' in record)
示例#11
0
    def read_config(self, config_file):
        try:
            f=open(config_file)
            yml=f.read()
            f.close()
            self.config=yaml.load(yml)

        except IOError as ioe:
            warn("error trying to load global config file:")
            die(UserError(ioe))
示例#12
0
    def subsets(self):
        if not hasattr(self, 'n_subsets'):
            if 'DEBUG' in os.environ: warn("%s: no subsets?" % (self.geo_id))
            return []

        subsets=[]
        for i in range(1,int(self.n_subsets)+1):
            subset_id="%s_%d" % (self.geo_id, i)
            subsets.append(DatasetSubset(subset_id).populate())
        return subsets
示例#13
0
    def id_file_callback(option, opt, value, parser, *args, **kwargs):
        warn("options is %s" % (option))
        warn("opt is %s" % (opt))
        warn("value is %s" % (value))
        warn("args are %s" % (args))
        warn("kwargs are %s" % (kwargs))

        f = open(value)
        idlist = [x for x in f if type(x) == int]
        f.close()
        parser.values.idlist.extend(idlist)
示例#14
0
 def test_with_phenos(self):
     for pheno in [
             "adenocarcinoma", "normal", "asthma",
             "squamous cell carcinoma",
             "chronic obstructive pulmonary disease",
             "large cell lung carcinoma"
     ]:
         samples = GEO.Sample.Sample.with_pheno(pheno)
         warn("%s: got %d samples" % (pheno, len(samples)))
         for sample in samples:
             self.assertEqual(sample.phenotype, pheno)
示例#15
0
    def id_file_callback(option, opt, value, parser, *args, **kwargs):
        warn("options is %s" % (option))
        warn("opt is %s" % (opt))
        warn("value is %s" % (value))
        warn("args are %s" % (args))
        warn("kwargs are %s" % (kwargs))

        f=open(value)
        idlist=[x for x in f if type(x) == int]
        f.close()
        parser.values.idlist.extend(idlist)
示例#16
0
def get_geo_ids(options):
    if len(options.idlist): return options.idlist

    geo_ids=[]
#    for cls in [GEO.Series.Series, GEO.Dataset.Dataset, GEO.DatasetSubset.DatasetSubset]:
# only doing series now, and everything else goes through series
    for cls in [GEO.Series.Series]:
        cursor=cls.mongo().find({}, {'_id':0, 'geo_id':1})
        warn("got %d %s records" % (cursor.count(), cls.__name__))
        for record in cursor:
            geo_ids.append(record['geo_id'])
    return geo_ids
示例#17
0
def _all_geo_pmids():
    ''' gets pmids from all geo objects in the db: '''
    pmidlist = []
    for cls in [Series, Dataset]:
        cursor = cls.mongo().find({'pubmed_id': {'$ne': 'null'}})
        for record in cursor:
            if 'pubmed_id' in record:
                pmids = record['pubmed_id']
                if type(pmids) == type([]):
                    pmidlist.extend([int(x) for x in pmids])
                else:
                    pmidlist.append(int(pmids))
                warn("pmids are %s" % (pmids))

    return pmidlist
示例#18
0
def _all_geo_pmids():
    ''' gets pmids from all geo objects in the db: '''
    pmidlist=[]
    for cls in [Series, Dataset]:
        cursor=cls.mongo().find({'pubmed_id': {'$ne':'null'}})
        for record in cursor:
            if 'pubmed_id' in record: 
                pmids=record['pubmed_id']
                if type(pmids)==type([]):
                    pmidlist.extend([int(x) for x in pmids])
                else:
                    pmidlist.append(int(pmids))
                warn("pmids are %s" % (pmids))

    return pmidlist
示例#19
0
    def fetch(self):
        ''' Return Document object for this pubmed id, obtained from NCBI if necessary '''
        if os.access(self.path(), os.R_OK):
            warn("%d: already on disk" % ())
            f=open(self.path(), 'r')
            xml_doc=f.read()
            f.close()
        else: 
            warn("%d: fetching from pubmed" % (self.pubmed_id))
            xml_doc=Entrez.efetch(db="pubmed", id=self.pubmed_id, retmode='xml').read()
            f=open(self.path(), 'w')
            f.write(xml_doc+"\n")
            f.close()

        doc=parseString(xml_doc)
        return doc
示例#20
0
def _geolist2pmidlist(geo_ids):
    ''' converts a list of mixed pmids and geo_ids to all pmids by doing the lookups on the geo objects '''
    pmidlist=[]
    for id in geo_ids:
        if re.match('^\d+$', id):
            pmidlist.append(id)
        else:
            try:
                geo=Factory().newGEO(id)
                pmids=geo.pubmed_id # might be single value or list, so:
            except Exception as e: 
                warn("caught %s" % (e))
                continue    # id not a geo id, or geo didn't have any pubmed_id
            
            try: pmidlist.append(pmids)
            except: pmidlist.extend(pmids)
    return pmidlist
示例#21
0
    def test_all_ids_with_pheno(self):
        phenos=[
            "normal",
            "adenocarcinoma",
            "squamous cell carcinoma",
            "asthma",
            "chronic obstructive pulmonary disease",
            "large cell lung carcinoma"
            ]

        for pheno in phenos:
            ids=Sample.all_ids_with_pheno(pheno)
            self.assertIsInstance(ids, list)
            warn("got %d '%s' samples" % (len(ids), pheno))
            self.assertTrue(len(ids) > 0)

            ids_with_data=[x for x in ids if os.access(Sample.data_path_of(geo_id=x), os.R_OK)]
            warn("got %d '%s' samples with data" % (len(ids_with_data), pheno))
示例#22
0
    def fetch(self):
        ''' Return Document object for this pubmed id, obtained from NCBI if necessary '''
        if os.access(self.path(), os.R_OK):
            warn("%d: already on disk" % ())
            f = open(self.path(), 'r')
            xml_doc = f.read()
            f.close()
        else:
            warn("%d: fetching from pubmed" % (self.pubmed_id))
            xml_doc = Entrez.efetch(db="pubmed",
                                    id=self.pubmed_id,
                                    retmode='xml').read()
            f = open(self.path(), 'w')
            f.write(xml_doc + "\n")
            f.close()

        doc = parseString(xml_doc)
        return doc
示例#23
0
    def _ref_dc(self) -> int:
        """
        Get the position of the reference dataset from the results file as a 0-based index

        Returns
        -------
        ref_dc : int
        """
        ref_dc = 0

        try:
            val_ref = self.meta[globals._ref_ds_attr]
            ref_dc = parse(globals._ds_short_name_attr, val_ref)[0]
        except KeyError as e:
            warn("The netCDF file does not contain the attribute {}".format(globals._ref_ds_attr))
            raise e

        return ref_dc
示例#24
0
    def test_all_ids_with_pheno(self):
        phenos = [
            "normal", "adenocarcinoma", "squamous cell carcinoma", "asthma",
            "chronic obstructive pulmonary disease",
            "large cell lung carcinoma"
        ]

        for pheno in phenos:
            ids = Sample.all_ids_with_pheno(pheno)
            self.assertIsInstance(ids, list)
            warn("got %d '%s' samples" % (len(ids), pheno))
            self.assertTrue(len(ids) > 0)

            ids_with_data = [
                x for x in ids
                if os.access(Sample.data_path_of(geo_id=x), os.R_OK)
            ]
            warn("got %d '%s' samples with data" % (len(ids_with_data), pheno))
示例#25
0
def _geolist2pmidlist(geo_ids):
    ''' converts a list of mixed pmids and geo_ids to all pmids by doing the lookups on the geo objects '''
    pmidlist = []
    for id in geo_ids:
        if re.match('^\d+$', id):
            pmidlist.append(id)
        else:
            try:
                geo = Factory().newGEO(id)
                pmids = geo.pubmed_id  # might be single value or list, so:
            except Exception as e:
                warn("caught %s" % (e))
                continue  # id not a geo id, or geo didn't have any pubmed_id

            try:
                pmidlist.append(pmids)
            except:
                pmidlist.extend(pmids)
    return pmidlist
示例#26
0
    def test_store(self):
        warn("\n")
        pmid=18297132
        pubmed=Pubmed(pmid)
        self.assertIsInstance(pubmed, Pubmed)
        self.assertEqual(pubmed.pubmed_id, pmid)

        pubmed.remove()
        pubmed.store()
        self.assertTrue(os.access(pubmed.path(), os.R_OK))
        cursor=Pubmed.mongo().find({'pubmed_id':pmid})
        self.assertEqual(cursor.count(), len(Pubmed.text_tags))
        
        tag2count={'MeshHeading':22,
                   'AbstractText':247,
                   'ArticleTitle':15}
        for record in cursor:
            tag=record['tag']
            self.assertEqual(len(record['words']), tag2count[tag])
示例#27
0
    def test_store(self):
        warn("\n")
        pmid = 18297132
        pubmed = Pubmed(pmid)
        self.assertIsInstance(pubmed, Pubmed)
        self.assertEqual(pubmed.pubmed_id, pmid)

        pubmed.remove()
        pubmed.store()
        self.assertTrue(os.access(pubmed.path(), os.R_OK))
        cursor = Pubmed.mongo().find({'pubmed_id': pmid})
        self.assertEqual(cursor.count(), len(Pubmed.text_tags))

        tag2count = {
            'MeshHeading': 22,
            'AbstractText': 247,
            'ArticleTitle': 15
        }
        for record in cursor:
            tag = record['tag']
            self.assertEqual(len(record['words']), tag2count[tag])
示例#28
0
    def expression_data(self, id_type="gene"):
        """ 
        if id_type == None, choose one based on which data exists
        return (id_type, data) where data is a dict: k=id, v=expression value
        raises exceptions on errors, so be careful
        """

        # determine id_type if necessary, checking for existance as well:
        if id_type == None:
            if os.access(self.data_path(id_type="gene"), os.R_OK):
                id_type = "gene"
            elif os.access(self.data_path(id_type="probe"), os.R_OK):
                id_type = "probe"
            else:
                raise Exception("No data for sample %s" % self.geo_id)
        else:
            if not re.search("^gene|probe$", id_type):
                raise Exception('id_type must be one of "gene" or "probe"')

        # open data file:
        data_filename = self.data_path(id_type=id_type)
        data_file = open(data_filename, "r")
        if id_type == "probe":
            burn_line = data_file.readline()
            burn_line = data_file.readline()

        # read data and store to dict:
        data = {}
        for line in data_file:
            l = re.split("[,\s]+", line)
            if l[0] in data:
                warn("Sample.expression_data: overwriting %s %f->%f" % (l[0], data[l[0]], l[1]))
            data[l[0]] = float(l[1])
        data_file.close()

        #        self.data[id_type]=data
        return (id_type, data)
示例#29
0
def main():
    options=get_options()
    geo_ids=get_geo_ids(options)
    f=Factory()
    warn("insert_geo_words starting: %s" % (datetime.datetime.now().__str__()))

    fuse=options.fuse
    for geo_id in geo_ids:
        geo=f.newGEO(geo_id)
        warn("inserting %s" % (geo.geo_id))
        stats=insert_series(geo)
        warn("%s: %s" % (geo_id, stats))
        fuse-=1
        if (fuse==0): break

    warn("insert_geo_words done: %s" % (datetime.datetime.now().__str__()))
    return 0
示例#30
0
    def insert_geo(self, geo):
        '''
        insert all words associated with a geo object into the word2geo db
        '''
        self.mongo().remove({'geo_id': geo.geo_id})

        # words: k=tag, v=list of sanitized words (may have dups)
        words = self.get_field_words(geo)
        if hasattr(geo, 'pubmed_id'):
            if type(geo.pubmed_id) == type([]):
                for pmid in [int(x) for x in geo.pubmed_id]:
                    words.update(self.get_pubmed_words(pmid))
            else:
                words.update(self.get_pubmed_words(int(geo.pubmed_id)))

        totals = dict()
        for source, words in words.items():
            for word in words:
                warn("%s: adding %s:%s" % (geo.geo_id, source, word))
                query = {'geo_id': geo.geo_id, 'word': word, 'source': source}
                record = self.mongo().find_one(query)
                if record:
                    if 'count' in record: record['count'] += 1
                    else: record['count'] = 1
                else:
                    record = query
                    record['count'] = 1
                self.mongo().save(record)

                try:
                    totals[source] += 1
                except:
                    totals[source] = 1

        warn("%s: %s" % (geo.geo_id, totals))
        return
示例#31
0
    def get_session(self):
        debug=self.conf_value('debug')
        try: 
            return self.session
        except AttributeError: 
            db_name=self.get_db_file()
            self.make_db_dir(db_name)
            engine=create_engine('sqlite:///%s' % db_name, echo=False)
            warn("connected to %s" % (db_name))
            metadata=MetaData()

            # have to import these explicitly because we're in a classmethod: (or something)
            from Rnaseq import Pipeline, Step, Readset, StepRun, PipelineRun, FileOutput 
            classes=[Pipeline,Readset,PipelineRun,StepRun,FileOutput] # omit step
            tables={}
            for cls in classes:
                tables[cls]=cls.create_table(metadata,engine)

            Session=sessionmaker(bind=engine)
            session=Session()
            self.engine=engine
            self.metadata=metadata
            self.session=session
            return session
示例#32
0
def main(options):
    warn("getting pmid list..." % ())
    idlist = get_pmidlist(options)
    warn("processing %d ids: %s" % (len(idlist), idlist))
    if options.dry_run: exit(0)

    fuse = options.fuse
    for pmid in idlist:
        pubmed = Pubmed(pmid)
        warn("pmid is %s" % (pmid))
        continue
        try:
            pubmed.store()  # does the fetching automatically
        except Exception as e:
            warn("%d: caught %s" % (pmid, e))

        fuse -= 1
        if (fuse == 0): break

    exit(0)
示例#33
0
def main(options):
    warn("getting pmid list..." % ())
    idlist=get_pmidlist(options)
    warn("processing %d ids: %s" % (len(idlist), idlist))
    if options.dry_run: exit(0)

    fuse=options.fuse
    for pmid in idlist:
        pubmed=Pubmed(pmid)
        warn("pmid is %s" % (pmid))
        continue
        try: 
            pubmed.store()      # does the fetching automatically
        except Exception as e:
            warn("%d: caught %s" % (pmid, e))

        fuse-=1
        if (fuse==0): break
                
    exit(0)
示例#34
0
    def test_composite(self):
        sy=superyaml(domain='/proj/hoodlab/share/vcassen/rna-seq/rnaseq/templates')
        readset=sy.load('readset.syml',{})

        bowtie=sy.load('bowtie.syml',readset)
        warn("bowtie 1",yaml.dump(bowtie))
        self.assertEquals(bowtie['exe'],'bowtie','got d[exe]=%s, expected "bowtie"' % bowtie['exe'])

        bowtie.update(readset)
        bowtie=sy.load('bowtie.syml',bowtie)
        warn("bowtie 2",yaml.dump(bowtie))

        expected=("%(exe)s %(ewbt)s %(args)s" % hashslice(bowtie,'exe','ewbt','args')) + " ${input} ${output}"
        warn("expected is %s" % expected)
        self.assertEquals(bowtie['usage'], expected,
                          'got bowtie[usage]=%s, expected %s' % (bowtie['usage'], expected))
示例#35
0
 def __init__(self, *args):
     assert len(args) == 1
     try:
         self.pubmed_id = int(args[0])
     except Exception as e:
         warn("args[0]: %s; caught %s" % (args[0], e))
示例#36
0
        f.close()
        parser.values.idlist.extend(idlist)

    parser = OptionParser()
    parser.add_option('-f',
                      '--id_file',
                      action='callback',
                      callback=id_file_callback,
                      help='file containing list of ids')
    parser.add_option('-n',
                      '--dry-run',
                      dest='dry_run',
                      action='store_true',
                      default=False,
                      help='do not actually store/fetch any ids')
    parser.add_option('--fuse',
                      dest='fuse',
                      type='int',
                      default=-1,
                      help='debugging fuse (limits iterations in main loop)')

    (options, args) = parser.parse_args()
    warn("options are %s" % (options))
    warn("args are %s" % (args))
    if hasattr(options, 'idlist'):
        options.idlist.extend(args)
    else:
        options.idlist = args

    main(options)
示例#37
0
 def setUp(self):
     warn("\n")
示例#38
0
+            else:
+                with file:
+                    import ConfigParser
+                    config = ConfigParser.ConfigParser()
+                    config.readfp(file)
+                    separator = config.get('parse_qs', envvar_name)
+                    _default_qs_separator = separator
+                config_source = _QS_SEPARATOR_CONFIG_FILENAME
+        if separator is None:
+            # The default is '&', but warn if not specified explicitly
+            if ';' in qs:
+                from warnings import warn
+                warn("The default separator of urlparse.parse_qsl and "
+                    + "parse_qs was changed to '&' to avoid a web cache "
+                    + "poisoning issue (CVE-2021-23336). "
+                    + "By default, semicolons no longer act as query field "
+                    + "separators. "
+                    + "See https://access.redhat.com/articles/5860431 for "
+                    + "more details.",
+                    _QueryStringSeparatorWarning, stacklevel=2)
+            separator = '&'
+        elif separator == 'legacy':
+            separator = _legacy
+        elif len(separator) != 1:
+            raise ValueError(
+                '{} (from {}) must contain '.format(envvar_name, config_source)
+                + '1 character, or "legacy". See '
+                + 'https://access.redhat.com/articles/5860431 for more details.'
+            )
+
     # If max_num_fields is defined then check that the number of fields
     # is less than max_num_fields. This prevents a memory exhaustion DOS
示例#39
0
 def setUp(self):
     warn("\n")
示例#40
0
 def test_n_too_big(self):
     s='this is a string with some stuff in it'
     n_words=len(s.split(' '))
     warn("n_words is %d" % (n_words))
     self.assertEqual(str_windows(s,n_words+1), [])
示例#41
0
 def test_all_with_data_gene(self):
     samples=Sample.all_with_data(id_type='gene')
     warn("got %d 'gene' samples" % (len(samples)))
     self.assertTrue(len(samples)>100)
示例#42
0
 def test_all_with_data_probe(self):
     samples=Sample.all_with_data(id_type='probe', ids_only=True)
     warn("got %d 'probe' samples" % (len(samples)))
     self.assertTrue(len(samples)>100)
示例#43
0
def insert_series(series):
    global seen_dataset;
    global seen_pubmed;
    seen_pubmed={}
    debug='DEBUG' in os.environ

    # gather ALL the words!
    words=gather_words(series)

    totals=insert_words(series, words)
    if debug: warn("series %s: %s" % (series.geo_id, totals))
    if type(series) != Series: return

    # build up words from datasets and subsets, and insert words as we go:
    # (but only insert dataset/subset words once)
    datasets=series.datasets()
    warn("%s: %d datasets" % (series.geo_id, len(datasets)))
    for dataset in datasets:
        warn("  %s: inserting %s" % (series.geo_id, dataset.geo_id))
        ds_words=gather_words(dataset)
        add_words(words, ds_words)
        if dataset.geo_id not in seen_dataset: 
            ds_totals=insert_words(dataset, ds_words)
            if debug: warn("dataset %s: %s" % (dataset.geo_id, totals))
            add_totals(totals, ds_totals)

        try: warn("%s: %d subsets" % (dataset.geo_id, dataset.n_subsets))
        except AttributeError: warn("%s: subsets not defined???" % (dataset.geo_id))

        for subset in dataset.subsets():
            warn("  %s: inserting %s" % (series.geo_id, subset.geo_id))
            ss_words=gather_words(subset)
            add_words(words, ss_words)
            if dataset.geo_id not in seen_dataset:
                ss_totals=insert_words(subset, ss_words)
                if debug: warn("subset %s: %s" % (subset.geo_id, totals))
                add_totals(totals,ss_totals)
        seen_dataset[dataset.geo_id]=True

    # add the sum of words from all objects to every sample in the series:
    samples=series.samples()
    warn("%d samples for %s" % (len(samples), series.geo_id))
    for sample in samples:
        warn("  %s: inserting %s" % (series.geo_id, sample.geo_id))
        s_totals=insert_words(sample, words)
        if debug: warn("sample %s: %s" % (sample.geo_id, totals))
        add_totals(totals, s_totals)

    return totals
示例#44
0
if __name__ == '__main__':
    def id_file_callback(option, opt, value, parser, *args, **kwargs):
        warn("options is %s" % (option))
        warn("opt is %s" % (opt))
        warn("value is %s" % (value))
        warn("args are %s" % (args))
        warn("kwargs are %s" % (kwargs))

        f=open(value)
        idlist=[x for x in f if type(x) == int]
        f.close()
        parser.values.idlist.extend(idlist)



    parser=OptionParser()
    parser.add_option('-f', '--id_file', action='callback', callback=id_file_callback, help='file containing list of ids')
    parser.add_option('-n', '--dry-run', dest='dry_run', action='store_true', default=False, help='do not actually store/fetch any ids')
    parser.add_option('--fuse', dest='fuse', type='int', default=-1, help='debugging fuse (limits iterations in main loop)')
    
    (options, args)=parser.parse_args()
    warn("options are %s" % (options))
    warn("args are %s" % (args))
    if hasattr(options, 'idlist'):
        options.idlist.extend(args)
    else:
        options.idlist=args

    
    main(options)
示例#45
0
 def test_all_ids_with_data(self):
     ids=Sample.all_ids_with_data(id_type='probe')
     warn("len(ids)=%d" % (len(ids)))
     self.assertTrue(len(ids) > 1000)
示例#46
0
 def test_all_with_data_gene(self):
     samples = Sample.all_with_data(id_type='gene')
     warn("got %d 'gene' samples" % (len(samples)))
     self.assertTrue(len(samples) > 100)
示例#47
0
 def test_all_with_data_probe(self):
     samples = Sample.all_with_data(id_type='probe', ids_only=True)
     warn("got %d 'probe' samples" % (len(samples)))
     self.assertTrue(len(samples) > 100)
示例#48
0
 def test_with_phenos(self):
     for pheno in ["adenocarcinoma","normal","asthma","squamous cell carcinoma","chronic obstructive pulmonary disease","large cell lung carcinoma"]:
         samples=GEO.Sample.Sample.with_pheno(pheno)
         warn("%s: got %d samples" % (pheno, len(samples)))
         for sample in samples:
             self.assertEqual(sample.phenotype, pheno)
示例#49
0
def dump_words(words, msg):
    for tag,l in words.items():
        warn("%s: %s: %d items" % (msg, tag, len(l)))
    warn("\n")
示例#50
0
 def test_connection(self):
     warn(sys._getframe().f_code.co_name) # testing framework does this anyway...
     connection=pymongo.Connection()
     self.assertIsInstance(connection, pymongo.Connection)
     self.connection=connection
示例#51
0
from sandbox.markers import ArucoMarkers, MarkerDetection
from sandbox.sensor import Sensor
from sandbox import _test_data, _calibration_dir
im_folder = _test_data['test']
import numpy as np
import matplotlib.pyplot as plt
frame = np.load(im_folder + 'frame1.npz')
depth = frame['arr_0']
color = frame['arr_1']
try:
    sensor = Sensor(calibsensor=_calibration_dir + "sensorcalib.json",
                    name='kinect_v2')
except:
    import warnings as warn
    warn("Testing will be performed without the sensor")
    sensor = None


def test_plot_image():
    depth = frame['arr_0']
    col = frame['arr_1']
    plt.imshow(depth)
    plt.show()
    plt.imshow(col)
    plt.show()


def test_aruco_detect():
    aruco = ArucoMarkers()
    corners, ids, rejected = aruco.aruco_detect(color)
    print(corners, ids, rejected)
示例#52
0
import sys, os
sys.path.append(os.path.join(os.environ['AUREA_HOME'], 'src'))
sys.path.append(os.path.join(os.environ['TRENDS_HOME'], 'pylib'))

import GEO
from GEO.word2geo import Word2Geo
from warn import *

for cls in [
        GEO.Series.Series, GEO.Dataset.Dataset, GEO.DatasetSubset.DatasetSubset
]:
    #for cls in [GEO.Series.Series, GEO.Dataset.Dataset]:
    cursor = cls.mongo().find()
    for record in cursor:
        geo = cls(record)
        warn("inserting %s" % (geo.geo_id))
        Word2Geo.insert_geo(geo)
        break