def main(): start_count = len(data_in) end_count = 0 while len(data_in) != 0: data = data_in.pop(0) push_object(data, 'analysis_objects0') end_count = end_count + 1 logger.info('Imported %d out of %d', end_count, start_count)
def filter_words_list(): """ Apply filter words to the list, this is done post tagging in order that the tagged objects have the correct lexical context """ while list_len(DbList.analysis_objects0) != 0: ao = pop_object(AnalysisObject, DbList.analysis_objects0) fao = filter_words(ao) push_object(fao, DbList.filtered_objects)
def test_push_analysis_object(self): """ Test AnalysisObjects can be pushed to a redis list """ flush_redis() ao = AnalysisObject(title='test', info=['foo', 'bar', 'baz']) push_object(ao, 'analysis_objects0') with RedisContext() as r: self.assertEqual(r.llen('analysis_objects0'), 1)
def test_push_query_object(self): """ Test QueryObjects can be pushed to a redis list """ flush_redis() qo = QueryObject(title='test', url_query='https://www.example.com') push_object(qo, 'query_objects') with RedisContext() as r: self.assertEqual(r.llen('query_objects'), 1)
def test_pop_analysis_object(self): """ Test a json object can be popped from the list and instantiated as an AnalysisObject """ ao = AnalysisObject(title='test', info=['foo', 'bar', 'baz']) push_object(ao, 'analysis_objects0') pop_ao = pop_object(AnalysisObject, 'analysis_objects0') self.assertIsInstance(pop_ao, AnalysisObject) self.assertEqual(pop_ao.info, ['foo', 'bar', 'baz'])
def test_pop_query_object(self): """ Test a json object can be popped from the list and instantiated as a QueryObject """ qo = QueryObject(title='test', url_query='https://www.example.com') push_object(qo, 'query_objects') pop_qo = pop_object(QueryObject, 'query_objects') self.assertIsInstance(pop_qo, QueryObject) self.assertEqual(pop_qo.url_query, 'https://www.example.com')
def create_analysis_object(qo): """ Take a QueryObject and GET data from the page specified in the url_query attribute. :param qo: A list of QueryObjects :type qo: list[QueryObject] """ try: for qu in qo: logger.info('pid: %d - processing: %s', os.getpid(), qu.title) ao = AnalysisObject(title=qu.title, info=get_info(qu.url_query)) push_object(ao, 'analysis_objects0') except AttributeError as e: logger.error(e)