def setUp(self): self.spider = Spider(log=Logging(log_file_name="test.log"), database=MongoDB( collection_name='test_collection_urls', database_name='test_database', )) self.url = "http://www.google.com"
def dummy_db(data): database = MongoDB( log=Logging(log_file_name="test.log"), database_name="test_database", collection_name="test_collection_content", unique_indexes=['text'], ) id = database.insert(data) return database, id
def __init__(self, *args, **kwargs): self.log = kwargs.get('log', Logging(verbose=kwargs.get('verbose', False))) self.retry_failed = kwargs.get('retry_failed', 0) self.file_path = kwargs.get('file_path', None) self.test_exists = kwargs.get('test_exists', False) self.database = kwargs.get( 'database', MongoDB( collection_name="crawler_urls", unique_indexes=['url'], ))
def dummy_search_index(): search_index = SearchIndex( index_path="test", log=Logging(log_file_name="test.log"), ) search_index.add( content_id=u'1', content=u'Testing ABC', ) search_index.add( content_id=u'2', content=u'Testing DEF', ) return search_index
def __init__(self, collection_name, *args, **kwargs): self.log = kwargs.get('log', Logging()) self.host = kwargs.get('host', MONGO_HOST) self.port = kwargs.get('port', MONGO_PORT) self.database_name = kwargs.get('database_name', MONGO_NAME) self.connection = kwargs.get( 'connection', Connection(host=self.host, port=self.port)) self.database = self.connection[self.database_name] self.collection = self.database[collection_name] self.unique_indexes = kwargs.get("unique_indexes", []) # create all required indexes for index in self.unique_indexes: if self.collection.ensure_index(index, unique=True): self.log.info("MongoDB", "insert", "New db index created: %s" % index)
def __init__(self, *args, **kwargs): """ Instantiate the whoosh schema and writer and create/open the index. """ self.schema = kwargs.get( 'schema', Schema( content_id=ID(stored=True, unique=True), content=TEXT(), )) self.log = kwargs.get('log', Logging()) # get the absolute path and create the dir if required self.index_path = kwargs.get('index_path', INDEX_PATH) if self.create(self.index_path): self.log.info("SearchIndex", "__init__", "New index created.") # create an index obj and buffered writer self.index_obj = open_dir(self.index_path)
def setUp(self): self.file_path = "/tmp/test.txt" # create a dummy file to parse through file_buffer = open(self.file_path, "w+") file_buffer.writelines( ['http://www.google.com', '\r\n', 'http://www.example.com']) file_buffer.close() # instantiate the syncdb class self.syncdb = SyncDB( log=Logging(log_file_name="test.log"), retry_failed=0, file_path=self.file_path, database=MongoDB( collection_name='test_collection_urls', database_name='test_database', ), test_exists=True, )
def setUp(self): self.log_file_name = "test.log" self.logging = Logging( log_file_name=self.log_file_name, debug=True, )
def __init__(self, url, *args, **kwargs): self.log = kwargs.get('log', Logging()) self.url = url
def __init__(self, *args, **kwargs): self.log = kwargs.get('log', Logging(verbose=kwargs.get('verbose', False))) self.database = kwargs.get('database', MongoDB(collection_name="crawler_urls"))