def prepare(self): try: self._session = self._mendeley.start_client_credentials_flow( ).authenticate() self._initialized = True except Exception as e: log.critical(e)
def get_profile_by_id(self, profile_id: str) -> Profile: if not self._initialized: log.critical("get_profile_by_id has been fired but the SDKCrawler was not initialized") return [] profile = self._session.profiles.get(profile_id) return get_profile_from_json(profile.json)
def get_profile_by_id(self, profile_id: str) -> Profile: if not self._initialized: log.critical( "get_profile_by_id has been fired but the SDKCrawler was not initialized" ) return [] profile = self._session.profiles.get(profile_id) return get_profile_from_json(profile.json)
def get_documents_by_group_id(self, group_id: str) -> [Document]: if not self._initialized: log.critical("get_documents_by_group_id has been fired but the SDKCrawler was not initialized") return [] results = [] documents = self._session.group_documents(group_id).iter(view='all') for document in documents: d = get_document_from_json(document.json) results.append(d) return results
def get_group_members(self, group_id: str) -> [Member]: if not self._initialized: log.critical("get_group_members has been fired but the SDKCrawler was not initialized") return [] results = [] members = self._session.group_members(group_id).iter() for member in members: m = get_member_from_json(member.member_json) if m.role != 'follower': results.append(m) return results
def get_documents_by_group_id(self, group_id: str) -> [Document]: if not self._initialized: log.critical( "get_documents_by_group_id has been fired but the SDKCrawler was not initialized" ) return [] results = [] documents = self._session.group_documents(group_id).iter(view='all') for document in documents: d = get_document_from_json(document.json) results.append(d) return results
def get_group_members(self, group_id: str) -> [Member]: if not self._initialized: log.critical( "get_group_members has been fired but the SDKCrawler was not initialized" ) return [] results = [] members = self._session.group_members(group_id).iter() for member in members: m = get_member_from_json(member.member_json) if m.role != 'follower': results.append(m) return results
def get_documents_by_profile_id(self, profile_id: str) -> [Document]: if not self._initialized: log.critical("get_documents_by_profile_id has been fired but the SDKCrawler was not initialized") return [] results = [] """ Unfortunately the official Mendeley SDK has no support for document queries by non-logged-in profile-ids Therefore i'll hack around that and reuse the session object to authenticate my own call. Critical SDK class: https://github.com/Mendeley/mendeley-python-sdk/blob/master/mendeley/resources/documents.py """ documents = ExtendedDocuments(self._session).iter(view='all', profile_id=profile_id, authored='true') for document in documents: d = get_document_from_json(document.json) results.append(d) return results
def get_documents_by_profile_id(self, profile_id: str) -> [Document]: if not self._initialized: log.critical( "get_documents_by_profile_id has been fired but the SDKCrawler was not initialized" ) return [] results = [] """ Unfortunately the official Mendeley SDK has no support for document queries by non-logged-in profile-ids Therefore i'll hack around that and reuse the session object to authenticate my own call. Critical SDK class: https://github.com/Mendeley/mendeley-python-sdk/blob/master/mendeley/resources/documents.py """ documents = ExtendedDocuments(self._session).iter( view='all', profile_id=profile_id, authored='true') for document in documents: d = get_document_from_json(document.json) results.append(d) return results
configuration.load() log.info("Configuration has been loaded") # Create data controller and assert schema # That will remove the race conditions of the gunicorn worker if it's done on every startup data_controller = DataController(configuration.database) data_controller.assert_schema() # Pipeline runner elif command == "pipeline": config = ServiceConfiguration() config.load() data_controller = DataController(config.database) if not data_controller.is_initialized(): log.critical("Database is not initialized") exit() crawler = None if not config.uses_mendeley: log.info("Pipeline uses FileCrawler") crawler = FileCrawler() else: from mendeleycache.crawler.sdk_crawler import SDKCrawler log.info("Pipeline uses SDKCrawler".format( app_id=config.crawler.app_id, app_secret=config.crawler.app_secret)) crawler = SDKCrawler(app_id=config.crawler.app_id, app_secret=config.crawler.app_secret) crawl_controller = CrawlController(crawler,
configuration.load() log.info("Configuration has been loaded") # Create data controller and assert schema # That will remove the race conditions of the gunicorn worker if it's done on every startup data_controller = DataController(configuration.database) data_controller.assert_schema() # Pipeline runner elif command == "pipeline": config = ServiceConfiguration() config.load() data_controller = DataController(config.database) if not data_controller.is_initialized(): log.critical("Database is not initialized") exit() crawler = None if not config.uses_mendeley: log.info("Pipeline uses FileCrawler") crawler = FileCrawler() else: from mendeleycache.crawler.sdk_crawler import SDKCrawler log.info("Pipeline uses SDKCrawler".format( app_id=config.crawler.app_id, app_secret=config.crawler.app_secret )) crawler = SDKCrawler( app_id=config.crawler.app_id, app_secret=config.crawler.app_secret
def prepare(self): try: self._session = self._mendeley.start_client_credentials_flow().authenticate() self._initialized = True except Exception as e: log.critical(e)