def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if ("subjectseeker" not in page) and ("Recent Posts" not in page): raise ProviderContentMalformedError (doc, lookup_function) = provider._get_doc_from_xml(page) if not doc: return {} try: feed_doc = doc.getElementsByTagName("feed") entry_docs = feed_doc[0].getElementsByTagName("entry") number_blog_posts = len(entry_docs) except (KeyError, IndexError, TypeError): return {} if number_blog_posts: metrics_dict = {'scienceseeker:blog_posts': number_blog_posts} else: metrics_dict = {} return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) if ("subjectseeker" not in page) and ("Recent Posts" not in page): raise ProviderContentMalformedError (doc, lookup_function) = provider._get_doc_from_xml(page) if not doc: return {} try: feed_doc = doc.getElementsByTagName("feed") entry_docs = feed_doc[0].getElementsByTagName("entry") number_blog_posts = len(entry_docs) except (KeyError, IndexError, TypeError): return {} if number_blog_posts: metrics_dict = {'scienceseeker:blog_posts': number_blog_posts} else: metrics_dict = {} return metrics_dict
def _extract_aliases_from_pmid(self, page, pmid): (doc, lookup_function) = provider._get_doc_from_xml(page) doi = None pmc = None try: articleidlist = doc.getElementsByTagName("ArticleIdList")[0] for articleid in articleidlist.getElementsByTagName("ArticleId"): if (articleid.getAttribute("IdType") == u"doi"): doi = articleid.firstChild.data if (articleid.getAttribute("IdType") == u"pmc"): pmc = articleid.firstChild.data if not doi: #give it another try, in another part of the xml # see http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=23682040&retmode=xml&[email protected]&tool=total-impact article = doc.getElementsByTagName("Article")[0] for elocationid in article.getElementsByTagName("ELocationID"): if (elocationid.getAttribute("EIdType") == u"doi"): if (elocationid.getAttribute("ValidYN") == u"Y"): doi = elocationid.firstChild.data except (IndexError, TypeError): pass #sometimes no doi, or PMID has a doi-fragment in the doi field: aliases_list = [] if doi: if "10." in doi: aliases_list += [("doi", doi), ("url", "http://dx.doi.org/"+doi)] if pmc: aliases_list += [("pmc", pmc), ("url", "http://www.ncbi.nlm.nih.gov/pmc/articles/"+pmc)] return aliases_list
def _extract_metrics(self, page, status_code=200, id=None): #logger.info(u"_extract_metrics with %s, %i,\n%s\n" % (id, status_code, page)) if status_code != 200: if (status_code == 404): return {} else: raise (self._get_error(status_code)) (doc, lookup_function) = provider._get_doc_from_xml(page) try: searchinfo = doc.getElementsByTagName('searchinfo') totalhits = int(searchinfo[0].attributes['totalhits'].value) except (TypeError, IndexError): raise ProviderContentMalformedError( "No searchinfo in response document") if totalhits: metrics_dict = {"wikipedia:mentions": totalhits} else: metrics_dict = {} #logger.info(u"_extract_metrics returns metrics_dict %s" % (str(metrics_dict))) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) if "<pmc-web-stat>" not in page: raise ProviderContentMalformedError (doc, lookup_function) = provider._get_doc_from_xml(page) if not doc: return {} try: articles = doc.getElementsByTagName("article") for article in articles: print article metrics_dict = {} meta_data = article.getElementsByTagName("meta-data")[0] pmid = meta_data.getAttribute("pubmed-id") if id == pmid: metrics = article.getElementsByTagName("usage")[0] pdf_downloads = int(metrics.getAttribute("pdf")) if pdf_downloads: metrics_dict.update( {'pmc:pdf_downloads': pdf_downloads}) abstract_views = int(metrics.getAttribute("abstract")) if abstract_views: metrics_dict.update( {'pmc:abstract_views': abstract_views}) fulltext_views = int(metrics.getAttribute("full-text")) if fulltext_views: metrics_dict.update( {'pmc:fulltext_views': fulltext_views}) unique_ip_views = int(metrics.getAttribute("unique-ip")) if unique_ip_views: metrics_dict.update( {'pmc:unique_ip_views': unique_ip_views}) figure_views = int(metrics.getAttribute("figure")) if figure_views: metrics_dict.update({'pmc:figure_views': figure_views}) suppdata_views = int(metrics.getAttribute("supp-data")) if suppdata_views: metrics_dict.update( {'pmc:suppdata_views': suppdata_views}) return metrics_dict except (KeyError, IndexError, TypeError): pass return {}
def _extract_citing_pmcids(self, page): if (not "PubMedToPMCcitingformSET" in page): raise ProviderContentMalformedError() dict_of_keylists = {"pubmed:pmc_citations": ["PubMedToPMCcitingformSET", "REFORM"]} (doc, lookup_function) = provider._get_doc_from_xml(page) try: pmcid_doms = doc.getElementsByTagName("PMCID") pmcids = [pmcid_dom.firstChild.data for pmcid_dom in pmcid_doms] except TypeError: logger.warning(u"%20s no PMCID xml tags for %s" % (self.provider_name, id)) pmcids = [] return pmcids
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if "<pmc-web-stat>" not in page: raise ProviderContentMalformedError (doc, lookup_function) = provider._get_doc_from_xml(page) if not doc: return {} try: articles = doc.getElementsByTagName("article") for article in articles: print article metrics_dict = {} meta_data = article.getElementsByTagName("meta-data")[0] pmid = meta_data.getAttribute("pubmed-id") if id == pmid: metrics = article.getElementsByTagName("usage")[0] pdf_downloads = int(metrics.getAttribute("pdf")) if pdf_downloads: metrics_dict.update({'pmc:pdf_downloads': pdf_downloads}) abstract_views = int(metrics.getAttribute("abstract")) if abstract_views: metrics_dict.update({'pmc:abstract_views': abstract_views}) fulltext_views = int(metrics.getAttribute("full-text")) if fulltext_views: metrics_dict.update({'pmc:fulltext_views': fulltext_views}) unique_ip_views = int(metrics.getAttribute("unique-ip")) if unique_ip_views: metrics_dict.update({'pmc:unique_ip_views': unique_ip_views}) figure_views = int(metrics.getAttribute("figure")) if figure_views: metrics_dict.update({'pmc:figure_views': figure_views}) suppdata_views = int(metrics.getAttribute("supp-data")) if suppdata_views: metrics_dict.update({'pmc:suppdata_views': suppdata_views}) return metrics_dict except (KeyError, IndexError, TypeError): pass return {}
def _filter(self, id, citing_pmcids, filter_ptype): pmcids_string = " OR ".join(["PMC"+pmcid for pmcid in citing_pmcids]) query_string = filter_ptype + "[ptyp] AND (" + pmcids_string + ")" pmcid_filter_url = self.metrics_pmc_filter_url_template %query_string page = self._get_eutils_page(pmcid_filter_url, id) (doc, lookup_function) = provider._get_doc_from_xml(page) try: id_docs = doc.getElementsByTagName("Id") pmids = [id_doc.firstChild.data for id_doc in id_docs] except TypeError: logger.warning(u"%20s no Id xml tags for %s" % (self.provider_name, id)) pmids = [] return pmids
def _extract_aliases_from_pmid(self, page, pmid): dict_of_keylists = {"doi": ["PubmedData", "ArticleIdList"]} (doc, lookup_function) = provider._get_doc_from_xml(page) doi = None try: articleidlist = doc.getElementsByTagName("ArticleIdList")[0] for articleid in articleidlist.getElementsByTagName("ArticleId"): if (articleid.getAttribute("IdType") == u"doi"): doi = articleid.firstChild.data except (IndexError, TypeError): pass #sometimes no doi, or PMID has a doi-fragment in the doi field: aliases_list = [] if doi: if "10." in doi: aliases_list = [("doi", doi)] return aliases_list
def _extract_metrics(self, page, status_code=200, id=None): #logger.info(u"_extract_metrics with %s, %i,\n%s\n" % (id, status_code, page)) if status_code != 200: if (status_code == 404): return {} else: raise(self._get_error(status_code)) (doc, lookup_function) = provider._get_doc_from_xml(page) try: searchinfo = doc.getElementsByTagName('searchinfo') totalhits = int(searchinfo[0].attributes['totalhits'].value) except (TypeError, IndexError): raise ProviderContentMalformedError("No searchinfo in response document") if totalhits: metrics_dict = {"wikipedia:mentions": totalhits} else: metrics_dict = {} #logger.info(u"_extract_metrics returns metrics_dict %s" % (str(metrics_dict))) return metrics_dict