def aliases(self, aliases, provider_url_template=None, cache_enabled=True): new_aliases = [] if not provider_url_template: provider_url_template = self.aliases_url_template for alias in aliases: (namespace, nid) = alias if namespace=="blog": new_alias = ("url", nid) if new_alias not in aliases: new_aliases += [new_alias] url = self._get_templated_url(provider_url_template, nid, "aliases") # try to get a response from the data provider response = self.http_get(url, cache_enabled=cache_enabled) if (response.status_code == 200) and ("ID" in response.text): dict_of_keylists = { 'wordpress_blog_id' : ['ID'] } aliases_dict = provider._extract_from_json(response.text, dict_of_keylists) new_alias = ("wordpress_blog_id", str(aliases_dict["wordpress_blog_id"])) if new_alias not in aliases: new_aliases += [new_alias] return new_aliases
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title': ['title'], 'authors': ['author', 'last_name'], 'journal': ['source', 'provider'], 'review_url': ['source', 'url'], 'review_type': ['review_type'], 'create_date': ['datetime_reviewed'], 'free_fulltext_url': ['_id', 'url'], 'source_provider': ['source', 'provider'], 'source_url': ['source', 'url'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) biblio_dict["genre"] = "peer review" biblio_dict["title"] = "Review of " + biblio_dict["title"] if "source_provider" in biblio_dict and biblio_dict["source_provider"]: biblio_dict["repository"] = biblio_dict["source_provider"] else: biblio_dict["repository"] = "Publons" if "source_url" in biblio_dict and biblio_dict["source_url"]: biblio_dict["free_fulltext_url"] = biblio_dict[ "source_url"] #overwrite with original source try: biblio_dict["year"] = biblio_dict["create_date"][0:4] except KeyError: pass return biblio_dict
def _extract_metrics_via_fetch(self, page, status_code=200, id=None): dict_of_keylists = { 'altmetric_com:tweets' : ['counts', 'twitter', 'posts_count'], 'altmetric_com:unique_tweeters' : ['counts', 'twitter', 'unique_users_count'], # 'altmetric_com:news' : ['counts', 'news', 'posts_count'], # 'altmetric_com:unique_news' : ['counts', 'news', 'unique_users_count'], # 'altmetric_com:news_names' : ['counts', 'news', 'unique_users'], 'altmetric_com:demographics' : ['demographics'], 'altmetric_com:posts' : ['posts'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) try: if metrics_dict['altmetric_com:posts'] and "twitter" in metrics_dict['altmetric_com:posts']: twitter_posts = metrics_dict['altmetric_com:posts']["twitter"] impressions = 0 tweeter_followers = [] for post in twitter_posts: # print post["author"] twitter_handle = post["author"]["id_on_source"] try: followers = post["author"]["followers"] tweeter_followers.append([twitter_handle, followers]) except KeyError: pass impressions = sum([followers for (handle, followers) in tweeter_followers]) if tweeter_followers: metrics_dict['altmetric_com:tweeter_followers'] = tweeter_followers if impressions: metrics_dict['altmetric_com:impressions'] = impressions except KeyError: pass # no posts return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "is_private" in page: raise ProviderContentMalformedError dict_of_keylists = { 'wordpresscom:subscribers' : ['subscribers_count'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) blog_url = self.url_from_nid(id) api_key = self.api_key_from_nid(id) url = self.metrics_url_template_views % (api_key, blog_url) response = self.http_get(url) try: data = json.loads(response.text) metrics_dict["wordpresscom:views"] = data["views"] except ValueError: pass return metrics_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title' : ['title'], 'authors' : ['author', 'last_name'], 'journal' : ['source', 'provider'], 'review_url' : ['source', 'url'], 'review_type' : ['review_type'], 'create_date' : ['datetime_reviewed'], 'free_fulltext_url' : ['_id', 'url'], 'source_provider' : ['source', 'provider'], 'source_url' : ['source', 'url'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) biblio_dict["genre"] = "peer review" biblio_dict["title"] = "Review of " + biblio_dict["title"] if "source_provider" in biblio_dict and biblio_dict["source_provider"]: biblio_dict["repository"] = biblio_dict["source_provider"] else: biblio_dict["repository"] = "Publons" if "source_url" in biblio_dict and biblio_dict["source_url"]: biblio_dict["free_fulltext_url"] = biblio_dict["source_url"] #overwrite with original source try: biblio_dict["year"] = biblio_dict["create_date"][0:4] except KeyError: pass return biblio_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title' : ['name'], 'description' : ['description'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) return biblio_dict
def test_extract_json(self): page = self.TEST_JSON dict_of_keylists = { 'title' : ['repository', 'name'], 'description' : ['repository', 'description']} response = provider._extract_from_json(page, dict_of_keylists) assert_equals(response, {'description': u'Git-based ToDo tool.', 'title': u'gtd'})
def _extract_aliases(self, page, id=None): dict_of_keylists = {"doi": ["doi"]} aliases_dict = provider._extract_from_json(page, dict_of_keylists) if aliases_dict: aliases_list = [(namespace, nid) for (namespace, nid) in aliases_dict.iteritems()] else: aliases_list = [] return aliases_list
def _extract_aliases(self, page, id=None): dict_of_keylists = {"altmetric_com": ["altmetric_id"]} aliases_dict = provider._extract_from_json(page, dict_of_keylists) if aliases_dict: aliases_list = [("altmetric_com", str(aliases_dict["altmetric_com"]))] else: aliases_list = [] return aliases_list
def _extract_aliases(self, page, id=None): dict_of_keylists = {"doi": ["doi"]} aliases_dict = provider._extract_from_json(page, dict_of_keylists) if aliases_dict: aliases_list = [(namespace, nid) for (namespace, nid) in aliases_dict.iteritems()] else: aliases_list = [] return aliases_list
def _extract_aliases(self, page, id=None): dict_of_keylists = {"altmetric_com": ["altmetric_id"]} aliases_dict = provider._extract_from_json(page, dict_of_keylists) if aliases_dict: aliases_list = [("altmetric_com", str(aliases_dict["altmetric_com"]))] else: aliases_list = [] return aliases_list
def _extract_biblio_issn(self, page, id=None): dict_of_keylists = {'issn': ['ISSN']} biblio_dict = provider._extract_from_json(page, dict_of_keylists) if not biblio_dict: return {} if "issn" in biblio_dict: biblio_dict["issn"] = biblio_dict["issn"][0].replace("-", "") return biblio_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title': ['title'], 'year': ['issued'], 'repository': ['publisher'], 'journal': ['container-title'], 'authors_literal': ['author'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) if not biblio_dict: return {} try: surname_list = [ author["family"] for author in biblio_dict["authors_literal"] ] if surname_list: biblio_dict["authors"] = u", ".join(surname_list) del biblio_dict["authors_literal"] except (IndexError, KeyError): try: literal_list = [ author["literal"] for author in biblio_dict["authors_literal"] ] if literal_list: biblio_dict["authors_literal"] = u"; ".join(literal_list) except (IndexError, KeyError): pass try: if "year" in biblio_dict: if "raw" in biblio_dict["year"]: biblio_dict["year"] = str(biblio_dict["year"]["raw"]) elif "date-parts" in biblio_dict["year"]: biblio_dict["year"] = str( biblio_dict["year"]["date-parts"][0][0]) biblio_dict["year"] = re.sub("\D", "", biblio_dict["year"]) if not biblio_dict["year"]: del biblio_dict["year"] except IndexError: logger.info( u"/biblio_print could not parse year {biblio_dict}".format( biblio_dict=biblio_dict)) del biblio_dict["year"] # replace many white spaces and \n with just one space try: biblio_dict["title"] = re.sub(u"\s+", u" ", biblio_dict["title"]) except KeyError: pass return biblio_dict
def _extract_biblio(self, page, nid=None): if not "is_private" in page: raise ProviderContentMalformedError dict_of_keylists = { 'title' : ['name'], 'description' : ['description'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) biblio_dict["url"] = self.url_from_nid(nid) return biblio_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title' : ['name'], 'description' : ['description'], 'owner' : ['owner', 'login'], 'url' : ['svn_url'], 'last_push_date' : ['pushed_at'], 'create_date' : ['created_at'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) return biblio_dict
def test_extract_json(self): page = self.TEST_JSON dict_of_keylists = { 'title': ['repository', 'name'], 'description': ['repository', 'description'] } response = provider._extract_from_json(page, dict_of_keylists) assert_equals(response, { 'description': u'Git-based ToDo tool.', 'title': u'gtd' })
def _extract_biblio_issn(self, page, id=None): dict_of_keylists = { 'issn' : ['ISSN'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) if not biblio_dict: return {} if "issn" in biblio_dict: biblio_dict["issn"] = biblio_dict["issn"][0].replace("-", "") return biblio_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) if not "views" in page: raise ProviderContentMalformedError dict_of_keylists = {'publons:views': ['stats', 'views']} metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) dict_of_keylists = { 'github:watchers' : ['watchers'], 'github:forks' : ['forks'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) dict_of_keylists = { 'topsy:tweets': ['response', 'all'], 'topsy:influential_tweets': ['response', 'influential'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) dict_of_keylists = { 'topsy:tweets' : ['response', 'all'], 'topsy:influential_tweets' : ['response', 'influential'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics_via_fetch(self, page, status_code=200, id=None): dict_of_keylists = { 'altmetric_com:tweets' : ['counts', 'twitter', 'posts_count'], 'altmetric_com:unique_tweeters' : ['counts', 'twitter', 'unique_users_count'], 'altmetric_com:demographics' : ['demographics'], 'altmetric_com:posts' : ['posts'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) try: if metrics_dict['altmetric_com:posts'] and "twitter" in metrics_dict['altmetric_com:posts']: altmetric_posts = metrics_dict['altmetric_com:posts']["twitter"] except KeyError: pass # no posts return metrics_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title' : ['name'], 'description' : ['description'], 'owner' : ['owner', 'login'], 'url' : ['svn_url'], 'last_push_date' : ['pushed_at'], 'create_date' : ['created_at'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) try: biblio_dict["year"] = biblio_dict["create_date"][0:4] except KeyError: pass return biblio_dict
def _extract_metrics_post_comments(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "comment_count" in page: raise ProviderContentMalformedError dict_of_keylists = { 'wordpresscom:comments' : ['comment_count'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title': ['name'], 'description': ['description'], 'owner': ['owner', 'login'], 'url': ['svn_url'], 'last_push_date': ['pushed_at'], 'create_date': ['created_at'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) try: biblio_dict["year"] = biblio_dict["create_date"][0:4] except KeyError: pass return biblio_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title' : ['title'], 'year' : ['issued'], 'repository' : ['publisher'], 'journal' : ['container-title'], 'authors_literal' : ['author'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) if not biblio_dict: return {} try: surname_list = [author["family"] for author in biblio_dict["authors_literal"]] if surname_list: biblio_dict["authors"] = u", ".join(surname_list) del biblio_dict["authors_literal"] except (IndexError, KeyError): try: literal_list = [author["literal"] for author in biblio_dict["authors_literal"]] if literal_list: biblio_dict["authors_literal"] = u"; ".join(literal_list) except (IndexError, KeyError): pass try: if "year" in biblio_dict: if "raw" in biblio_dict["year"]: biblio_dict["year"] = str(biblio_dict["year"]["raw"]) elif "date-parts" in biblio_dict["year"]: biblio_dict["year"] = str(biblio_dict["year"]["date-parts"][0][0]) biblio_dict["year"] = re.sub("\D", "", biblio_dict["year"]) if not biblio_dict["year"]: del biblio_dict["year"] except IndexError: logger.info(u"/biblio_print could not parse year {biblio_dict}".format( biblio_dict=biblio_dict)) del biblio_dict["year"] # replace many white spaces and \n with just one space try: biblio_dict["title"] = re.sub(u"\s+", u" ", biblio_dict["title"]) except KeyError: pass return biblio_dict
def _extract_metrics_from_open_source_report_card(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "repositories" in page: raise ProviderContentMalformedError dict_of_keylists = { 'github_account:active_repos' : ['repositories'], 'github_account:languages' : ["usage", 'languages'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "views" in page: raise ProviderContentMalformedError dict_of_keylists = { 'publons:views' : ['stats', 'views'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) if not "forks_count" in page: raise ProviderContentMalformedError dict_of_keylists = { 'github:stars': ['watchers'], 'github:forks': ['forks'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "forks_count" in page: raise ProviderContentMalformedError dict_of_keylists = { 'github:stars' : ['watchers'], 'github:forks' : ['forks'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics_from_api_users(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) if not "followers" in page: raise ProviderContentMalformedError dict_of_keylists = { 'github_account:followers' : ['followers'], 'github_account:number_repos' : ['public_repos'], 'github_account:number_gists' : ['public_gists'], 'github_account:joined_date' : ['created_at'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if not "identifiers" in page: raise ProviderContentMalformedError() dict_of_keylists = {"mendeley:readers": ["stats", "readers"], "mendeley:discipline": ["stats", "discipline"], "mendeley:career_stage": ["stats", "status"], "mendeley:country": ["stats", "country"], "mendeley:groups" : ["groups"]} metrics_dict = provider._extract_from_json(page, dict_of_keylists) # get count of groups try: metrics_dict["mendeley:groups"] = len(metrics_dict["mendeley:groups"]) except (TypeError, KeyError): # don't add null or zero metrics pass return metrics_dict
def _extract_metrics_via_fetch(self, page, status_code=200, id=None): dict_of_keylists = { 'altmetric_com:tweets': ['counts', 'twitter', 'posts_count'], 'altmetric_com:unique_tweeters': ['counts', 'twitter', 'unique_users_count'], # 'altmetric_com:news' : ['counts', 'news', 'posts_count'], # 'altmetric_com:unique_news' : ['counts', 'news', 'unique_users_count'], # 'altmetric_com:news_names' : ['counts', 'news', 'unique_users'], 'altmetric_com:demographics': ['demographics'], 'altmetric_com:posts': ['posts'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) try: if metrics_dict[ 'altmetric_com:posts'] and "twitter" in metrics_dict[ 'altmetric_com:posts']: twitter_posts = metrics_dict['altmetric_com:posts']["twitter"] impressions = 0 tweeter_followers = [] for post in twitter_posts: # print post["author"] twitter_handle = post["author"]["id_on_source"] try: followers = post["author"]["followers"] tweeter_followers.append([twitter_handle, followers]) except KeyError: pass impressions = sum( [followers for (handle, followers) in tweeter_followers]) if tweeter_followers: metrics_dict[ 'altmetric_com:tweeter_followers'] = tweeter_followers if impressions: metrics_dict['altmetric_com:impressions'] = impressions except KeyError: pass # no posts return metrics_dict
def _extract_biblio(self, page, id=None): dict_of_keylists = { 'title' : ['title'], 'year' : ['issued'], 'repository' : ['publisher'], 'journal' : ['container-title'], 'authors_literal' : ['author'] } biblio_dict = provider._extract_from_json(page, dict_of_keylists) if not biblio_dict: return {} try: surname_list = [author["family"] for author in biblio_dict["authors_literal"]] if surname_list: biblio_dict["authors"] = ", ".join(surname_list) del biblio_dict["authors_literal"] except (IndexError, KeyError): try: literal_list = [author["literal"] for author in biblio_dict["authors_literal"]] if literal_list: biblio_dict["authors_literal"] = "; ".join(literal_list) except (IndexError, KeyError): pass try: if "year" in biblio_dict: if "raw" in biblio_dict["year"]: biblio_dict["year"] = biblio_dict["year"]["raw"] elif "date-parts" in biblio_dict["year"]: biblio_dict["year"] = biblio_dict["year"]["date-parts"][0][0] except IndexError: logger.info("could not parse year {biblio_dict}".format( biblio_dict=biblio_dict)) del biblio_dict["year"] pass return biblio_dict
def _extract_metrics(self, page, status_code=200, id=None): if not "identifiers" in page: raise ProviderContentMalformedError() dict_of_keylists = { "mendeley:readers": ["stats", "readers"], "mendeley:discipline": ["stats", "discipline"], "mendeley:career_stage": ["stats", "status"], "mendeley:country": ["stats", "country"], "mendeley:groups": ["groups"] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) # get count of groups try: metrics_dict["mendeley:groups"] = len( metrics_dict["mendeley:groups"]) except (TypeError, KeyError): # don't add null or zero metrics pass return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise (self._get_error(status_code)) metrics_dict = {} if "hits" in page: data = provider._load_json(page) hits = [post["hits"] for post in data["response"]["list"]] if hits: sum_of_hits = sum(hits) metrics_dict["topsy:tweets"] = sum_of_hits else: dict_of_keylists = { 'topsy:tweets': ['response', 'all'], 'topsy:influential_tweets': ['response', 'influential'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def _extract_metrics(self, page, status_code=200, id=None): if status_code != 200: if status_code == 404: return {} else: raise(self._get_error(status_code)) metrics_dict = {} if "hits" in page: data = provider._load_json(page) hits = [post["hits"] for post in data["response"]["list"]] if hits: sum_of_hits = sum(hits) metrics_dict["topsy:tweets"] = sum_of_hits else: dict_of_keylists = { 'topsy:tweets' : ['response', 'all'], 'topsy:influential_tweets' : ['response', 'influential'] } metrics_dict = provider._extract_from_json(page, dict_of_keylists) return metrics_dict
def test_extract_json(self): page = self.TEST_JSON dict_of_keylists = {"title": ["repository", "name"], "description": ["repository", "description"]} response = provider._extract_from_json(page, dict_of_keylists) assert_equals(response, {"description": u"Git-based ToDo tool.", "title": u"gtd"})