def __init__(self, name=None, **kwargs): self.allowed_domains = [websites_allowed_domains.get(self.url_from)] if is_pagination: self.start_urls = [WebsiteTypes.get_pagination_url(self.url_from)] else: self.start_urls = self.details_urls from cwpoliticl.database_factory import DatabaseFactory, CollectionTypes database_factory = DatabaseFactory(kwargs['host'], kwargs['port'], kwargs['user'], kwargs['passwd'], kwargs['db'], kwargs['collection_name']) self._cache_db = database_factory.get_database(CollectionTypes.cache) self._history_db = database_factory.get_database( CollectionTypes.history) from cwpoliticl.extensions.rpc.wordpress_xml_rpc_utils import WDXmlRPCUtils self.wd_rpc = WDXmlRPCUtils(kwargs['wd_host'], kwargs['wd_user'], kwargs['wd_passwd']) self._parser = websites_parses.get(self.url_from) super(TheIndianEconomistDebugSpider, self).__init__(name, **kwargs)
def __init__(self, name=None, **kwargs): self.allowed_domains = [websites_allowed_domains.get(self.url_from)] if is_pagination: self.start_urls = [WebsiteTypes.get_pagination_url(self.url_from)] else: self.start_urls = self.details_urls from cwpoliticl.database_factory import DatabaseFactory, CollectionTypes database_factory = DatabaseFactory(kwargs['host'], kwargs['port'], kwargs['user'], kwargs['passwd'], kwargs['db'], kwargs['collection_name']) self._cache_db = database_factory.get_database(CollectionTypes.cache) self._history_db = database_factory.get_database(CollectionTypes.history) from cwpoliticl.extensions.rpc.wordpress_xml_rpc_utils import WDXmlRPCUtils self.wd_rpc = WDXmlRPCUtils(kwargs['wd_host'], kwargs['wd_user'], kwargs['wd_passwd']) self._parser = websites_parses.get(self.url_from) super(News18DebugSpider, self).__init__(name, **kwargs)