Exemplo n.º 1
0
def testPluginSubClass():
    """Test instantiation of this Plugin object
    """
    (parentFolder, sourceFolder, testdataFolder) = getAppFolders()
    runDateString = '2021-06-10'
    global app_inst
    global pluginClassInst
    app_inst = getMockAppInstance(parentFolder,
                                  runDateString,
                                  os.path.join(parentFolder, 'conf', 'newslookout.conf'))
    # import application specific modules:
    from plugins.mod_dataprep import mod_dataprep
    import data_structs
    import session_hist
    # instantiate the plugin object:
    pluginClassInst = mod_dataprep()
    print(f'Instantiated plugin name: {pluginClassInst.pluginName}')
    assert type(pluginClassInst).__name__ == "mod_dataprep", \
        "mod_dataprep Plugin was not initialising correctly"
    pluginClassInst.config(app_inst.app_config)
    dbAccessSemaphore = threading.Semaphore()
    sessionHistoryDB = session_hist.SessionHistory(
        ":memory:",
        dbAccessSemaphore)
    pluginClassInst.additionalConfig(sessionHistoryDB)
Exemplo n.º 2
0
def test_worker_init():
    # Test PluginWorker object init.
    (parentFolder, sourceFolder, testdataFolder) = getAppFolders()
    global app_inst
    app_inst = getMockAppInstance(parentFolder,
                                  '2021-06-10',
                                  os.path.join(parentFolder, 'conf', 'newslookout.conf'))
    app_inst.app_queue_manager.config(app_inst.app_config)
    from plugins.mod_en_in_inexp_business import mod_en_in_inexp_business
    import data_structs
    import session_hist
    pluginInst = mod_en_in_inexp_business()
    dbAccessSem = threading.Semaphore()
    sessionHistoryDB = session_hist.SessionHistory(':memory:', dbAccessSem)
    from worker import PluginWorker, ProgressWatcher, DataProcessor
    workerInst = PluginWorker(pluginInst,
                              Types.TASK_GET_URL_LIST,
                              sessionHistoryDB,
                              app_inst.app_queue_manager)
    assert type(workerInst) == PluginWorker, 'Worker object is not initialising correctly'
    print(f'Queue Fill wait time = {workerInst.queueFillwaitTime}')
    assert workerInst.queueFillwaitTime == 120, 'Worker object is not initialising queue wait time correctly'
    workerInst.setRunDate(app_inst.app_config.rundate)
    assert workerInst.runDate == app_inst.app_config.rundate, 'Worker object is unable to set rundate correctly'
    # test runURLListGatherTasks()
    def patch_fun(paramA, paramB):
        return ['https://www.newindianexpress.com/news1',
                'https://www.newindianexpress.com/news2',
                'https://www.newindianexpress.com/news3']
    # patch mock function for getURLsListForDate():
    pluginInst.getURLsListForDate = patch_fun
    workerInst.runURLListGatherTasks()
    assert pluginInst.getQueueSize()>0, 'runURLListGatherTasks()  is not retrieving URLs correctly.'
    while pluginInst.getQueueSize()>0:
        print(f'Queue size: {pluginInst.getQueueSize()}')
        print(f'Next item in queue: {pluginInst.getNextItemFromFetchQueue()}')

    # test news aggregator URL sourcing logic:
    test_dom_plugin_map = {'www.newindianexpress.com':'plugin1', 'www.thehindu.com':'plugin2'}
    allPluginObjs = {'plugin2': b'objectbytes', 'plugin1':b'objectotherbytes'}
    workerInst.setDomainMapAndPlugins(test_dom_plugin_map, allPluginObjs)
    assert workerInst.domainToPluginMap == test_dom_plugin_map,\
        'Worker object is unable to set domainToPluginMap correctly'
    assert workerInst.pluginNameToObjMap == allPluginObjs, \
        'Worker object is unable to set pluginNameToObjMap correctly'
    urlList = ['https://www.newindianexpress.com/news1',
               'https://www.newindianexpress.com/news2',
               'https://www.newindianexpress.com/news3',
               'https://www.thehindu.com/news4',
               'https://www.thehindu.com/news5']
    plugin_to_url_list_map = workerInst.aggregator_url2domain_map(urlList, allPluginObjs, test_dom_plugin_map)
    print(f'plugin_to_url_list_map = {plugin_to_url_list_map}, length = {len(plugin_to_url_list_map)}')
def testPluginSubClass():
    """Test case Base Plugin Class
    """
    (parentFolder, sourceFolder, testdataFolder) = getAppFolders()
    nltk_path = os.path.join(testdataFolder, 'nltk_data')
    print(f'Path for NLTK data is: {nltk_path}')
    os.environ["NLTK_DATA"] = nltk_path
    runDateString = '2021-06-10'
    global app_inst
    global pluginClassInst
    app_inst = getMockAppInstance(parentFolder,
                                  runDateString,
                                  os.path.join(parentFolder, 'conf', 'newslookout.conf'))
    # import application specific modules:
    from plugins.mod_en_in_forbes import mod_en_in_forbes
    import data_structs
    import session_hist
    logging.getLogger().setLevel(logging.DEBUG)
    pluginClassInst = mod_en_in_forbes()
    print(f'Instantiated plugins name: {pluginClassInst.pluginName}')
    assert type(pluginClassInst).__name__ == "mod_en_in_forbes", \
        "mod_en_in_forbes Plugin was not initialising correctly"
    pluginClassInst.config(app_inst.app_config)
    print(f'Base data directory configured as {pluginClassInst.baseDirName}')
    assert len(pluginClassInst.baseDirName) > 0, "mod_en_in_forbes Plugin not configured: baseDirName!"
    assert pluginClassInst.configReader is not None, "mod_en_in_forbes Plugin not configured: configReader!"
    assert len(pluginClassInst.urlMatchPatterns) > 0, "mod_en_in_forbes Plugin not configured: urlMatchPatterns!"
    # assert len(pluginClassInst.authorMatchPatterns) > 0, "mod_en_in_forbes not configured: authorMatchPatterns!"
    assert len(pluginClassInst.dateMatchPatterns) > 0, "mod_en_in_forbes Plugin not configured: dateMatchPatterns!"
    print(f'mod_en_in_forbes plugin {pluginClassInst.getStatusString()}')
    assert pluginClassInst.getStatusString() == 'State = STATE_GET_URL_LIST', \
        "mod_en_in_forbes Plugin status not set correctly!"
    pluginClassInst.initNetworkHelper()
    print(f'Network fetcher object instantiated with type = {type(pluginClassInst.networkHelper)}')
    assert type(pluginClassInst.networkHelper) == network.NetworkFetcher, "mod_en_in_forbes network fetcher not init!"
    pluginClassInst.setURLQueue(queue.Queue())
    assert type(pluginClassInst.urlQueue) == queue.Queue, "mod_en_in_forbes queue not set!"
    dbAccessSemaphore = threading.Semaphore()
    # Initialize object that reads and writes session history of completed URLs into a database
    sessionHistoryDB = session_hist.SessionHistory(
        ":memory:",
        dbAccessSemaphore)
    (urlCount, SQLiteVersion) = sessionHistoryDB.printDBStats()
    app_inst.app_queue_manager.config(app_inst.app_config)
    instNetwork = network.NetworkFetcher(app_inst.app_config, ['www.forbesindia.com'])
    print(f'Network object attribute - customHeader: {instNetwork.customHeader}')
    print(f'Network object attribute - fetch_timeout: {instNetwork.fetch_timeout}')
    assert instNetwork.fetch_timeout == 60, 'NetworkFetcher() not initialising fetch timeout from config file.'
    print(f'Network object attribute - connect_timeout: {instNetwork.connect_timeout}')
    assert instNetwork.connect_timeout == 10, 'NetworkFetcher() not initialising fetch timeout from config file.'
Exemplo n.º 4
0
def testPluginSubClass():
    """Test case Base Plugin Class
    """
    (parentFolder, sourceFolder, testdataFolder) = getAppFolders()
    runDateString = '2021-06-10'
    global app_inst
    global pluginClassInst
    app_inst = getMockAppInstance(
        parentFolder, runDateString,
        os.path.join(parentFolder, 'conf', 'newslookout.conf'))
    # import application specific modules:
    from plugins.mod_en_in_ecotimes import mod_en_in_ecotimes
    import data_structs
    import session_hist

    pluginClassInst = mod_en_in_ecotimes()
    print(f'Instantiated plugins name: {pluginClassInst.pluginName}')
    assert type(pluginClassInst).__name__ == "mod_en_in_ecotimes", \
        "mod_en_in_ecotimes Plugin was not initialising correctly"
    pluginClassInst.config(app_inst.app_config)
    print(f'Base data directory configured as {pluginClassInst.baseDirName}')
    assert len(pluginClassInst.baseDirName
               ) > 0, "mod_en_in_ecotimes Plugin not configured: baseDirName!"
    assert pluginClassInst.configReader is not None, "mod_en_in_ecotimes Plugin not configured: configReader!"
    assert len(
        pluginClassInst.urlMatchPatterns
    ) > 0, "mod_en_in_ecotimes Plugin not configured: urlMatchPatterns!"
    assert len(pluginClassInst.authorMatchPatterns
               ) > 0, "mod_en_in_ecotimes not configured: authorMatchPatterns!"
    assert len(
        pluginClassInst.dateMatchPatterns
    ) > 0, "mod_en_in_ecotimes Plugin not configured: dateMatchPatterns!"
    print(f'mod_en_in_ecotimes plugin {pluginClassInst.getStatusString()}')
    assert pluginClassInst.getStatusString() == 'State = STATE_GET_URL_LIST', \
        "mod_en_in_ecotimes Plugin status not set correctly!"
    pluginClassInst.initNetworkHelper()
    assert type(
        pluginClassInst.networkHelper
    ) == network.NetworkFetcher, "mod_en_in_ecotimes network fetcher not init!"
    pluginClassInst.setURLQueue(queue.Queue())
    assert type(pluginClassInst.urlQueue
                ) == queue.Queue, "mod_en_in_ecotimes queue not set!"
    dbAccessSemaphore = threading.Semaphore()
    # Initialize object that reads and writes session history of completed URLs into a database
    sessionHistoryDB = session_hist.SessionHistory(":memory:",
                                                   dbAccessSemaphore)
    (urlCount, SQLiteVersion) = sessionHistoryDB.printDBStats()
Exemplo n.º 5
0
def test_ProgressWatcher_init():
    # TODO: implement this
    (parentFolder, sourceFolder, testdataFolder) = getAppFolders()
    global app_inst
    app_inst = getMockAppInstance(parentFolder,
                                  '2021-06-10',
                                  os.path.join(parentFolder, 'conf', 'newslookout.conf'))
    app_inst.app_queue_manager.config(app_inst.app_config)
    from plugins.mod_en_in_inexp_business import mod_en_in_inexp_business
    import data_structs
    import session_hist
    import queue_manager
    pluginInst = mod_en_in_inexp_business()
    allPluginObjsMap = {'plugin2': b'objectbytes', 'plugin1':b'objectotherbytes'}
    dbAccessSem = threading.Semaphore()
    sessionHistoryDB = session_hist.SessionHistory(':memory:', dbAccessSem)
    queue_status = queue_manager.QueueStatus(app_inst.app_queue_manager)
    from worker import PluginWorker, ProgressWatcher, DataProcessor
    workerInst = ProgressWatcher(allPluginObjsMap,
                                 sessionHistoryDB,
                                 app_inst.app_queue_manager,
                                 queue_status,
                                 55)
    assert type(workerInst) == ProgressWatcher, 'ProgressWatcher object is not initialising correctly'
def testPluginSubClass():
    """Test case Base Plugin Class
    """
    (parentFolder, sourceFolder, testdataFolder) = getAppFolders()
    runDateString = '2021-06-10'
    global app_inst
    global pluginClassInst
    app_inst = getMockAppInstance(
        parentFolder, runDateString,
        os.path.join(parentFolder, 'conf', 'newslookout.conf'))
    # import application specific modules:
    from plugins.mod_en_in_ecotimes import mod_en_in_ecotimes
    import data_structs
    import session_hist

    pluginClassInst = mod_en_in_ecotimes()
    print(f'Instantiated plugins name: {pluginClassInst.pluginName}')
    assert type(pluginClassInst).__name__ == "mod_en_in_ecotimes",\
        "mod_en_in_ecotimes Plugin was not initialising correctly"
    pluginClassInst.config(app_inst.app_config)
    print(f'Base data directory configured as {pluginClassInst.baseDirName}')
    assert len(pluginClassInst.baseDirName
               ) > 0, "mod_en_in_ecotimes Plugin not configured: baseDirName!"
    assert pluginClassInst.configReader is not None, "mod_en_in_ecotimes Plugin not configured: configReader!"
    assert len(
        pluginClassInst.urlMatchPatterns
    ) > 0, "mod_en_in_ecotimes Plugin not configured: urlMatchPatterns!"
    assert len(pluginClassInst.authorMatchPatterns
               ) > 0, "mod_en_in_ecotimes not configured: authorMatchPatterns!"
    assert len(
        pluginClassInst.dateMatchPatterns
    ) > 0, "mod_en_in_ecotimes Plugin not configured: dateMatchPatterns!"
    print(f'mod_en_in_ecotimes plugin {pluginClassInst.getStatusString()}')
    assert pluginClassInst.getStatusString() == 'State = STATE_GET_URL_LIST',\
        "mod_en_in_ecotimes Plugin status not set correctly!"
    pluginClassInst.initNetworkHelper()
    assert type(
        pluginClassInst.networkHelper
    ) == network.NetworkFetcher, "mod_en_in_ecotimes network fetcher not init!"
    pluginClassInst.setURLQueue(queue.Queue())
    assert type(pluginClassInst.urlQueue
                ) == queue.Queue, "mod_en_in_ecotimes queue not set!"
    dbAccessSemaphore = threading.Semaphore()
    # Initialize object that reads and writes session history of completed URLs into a database
    sessionHistoryDB = session_hist.SessionHistory(":memory:",
                                                   dbAccessSemaphore)
    (urlCount, SQLiteVersion) = sessionHistoryDB.printDBStats()
    print(
        f'Completed URL count = {urlCount}, SQlite version = {SQLiteVersion}')
    urlList = [
        'https://economictimes.indiatimes.com/blogs/et-editorials/systemic-remedies-beyond-yes-bank/fakeurl',
        'https://economictimes.indiatimes.com/blogs/et-editorials/how-to-really-get-banks-to-lend-more/anotherfake'
    ]
    pluginClassInst.addURLsListToQueue(urlList, sessionHistoryDB)
    # check session history db has required structure:
    sqlCon = sessionHistoryDB.openConnFromfile(":memory:")
    cur = sqlCon.cursor()
    cur.execute('SELECT count(*) from pending_urls')
    data = cur.fetchone()
    print(f'Count of records in table pending_urls = {data[0]}')
    # check session history db has urls in pending queue:
    todoURLs = sessionHistoryDB.retrieveTodoURLList(pluginClassInst.pluginName)
    print(f'Pending URL listing from session history database = {todoURLs}')
    print(f'Plugin queue size = {pluginClassInst.getQueueSize()}')
    assert pluginClassInst.urlQueue.qsize(
    ) == 2, "mod_en_in_ecotimes - Cannot add to queue!"
    assert pluginClassInst.getQueueSize(
    ) == 1, "mod_en_in_ecotimes - Cannot get proper queue size!"
    retrievedItem = pluginClassInst.getNextItemFromFetchQueue()
    assert retrievedItem == urlList[
        0], "mod_en_in_ecotimes - Cannot retrieve from queue!"
    assert pluginClassInst.urlQueue.qsize(
    ) == 1, "mod_en_in_ecotimes - Cannot get queue size!"
    pluginClassInst.putQueueEndMarker()
    assert pluginClassInst.getNextItemFromFetchQueue(
    ) == urlList[1], "mod_en_in_ecotimes - Cannot retrieve item 2!"
    assert pluginClassInst.getNextItemFromFetchQueue(
    ) == None, "mod_en_in_ecotimes - Cannot retrieve queue sentinel!"
    assert pluginClassInst.pluginState == data_structs.Types.STATE_FETCH_CONTENT,\
        "mod_en_in_ecotimes - Queue sentinel marker did not set the correct state"
    assert pluginClassInst.isQueueEmpty(
    ) is True, "mod_en_in_ecotimes - Queue is not empty!"
    datePath = pluginClassInst.identifyDataPathForRunDate(
        pluginClassInst.baseDirName, runDateString)
    print(f'Path for date {runDateString} calculated as: {datePath}')
    assert datePath == os.path.join(pluginClassInst.baseDirName, runDateString),\
        "mod_en_in_ecotimes - path for date not computed correctly!"