Exemplo n.º 1
0
    def test_write(self):
        test_values = [
            (
                'age',                                              # feature
                ['person_1'],                                       # ids
                {'person_1': 31},                                   # arg of write()
                ({'_id': {'$in': ['person_1']}}, {'age': True}),    # mongo request to fetch result
                [{'_id': 'person_1', 'age': 31}],                   # result
            ),
            (
                'experience',
                ['person_1', 'person_3'],
                {'person_1': 31, 'person_3': 32},
                ({'_id': {'$in': ['person_1', 'person_3']}}, {'experience': True}),
                [{'_id': 'person_1', 'experience': 31}, {'_id': 'person_3', 'experience': 32}],
            ),
            (
                'experience',
                [],
                {'person_3': 18},
                ({'_id': {'$in': ['person_1', 'person_3']}}, {'experience': True}),
                [{'_id': 'person_1', 'experience': 10}, {'_id': 'person_3'}],
            ),
            (
                'age',
                ['person_1'],
                {'person_1': ['young', 'old']},
                ({'_id': 'person_1'}, {'age': True}),
                [{'_id': 'person_1', 'age': ['young', 'old']}],
            ),
            (
                'age',
                ['person_1'],
                {'person_1': {'feeling_like': 60}},
                ({'_id': 'person_1'}, {'age': True}),
                [{'_id': 'person_1', 'age': {'feeling_like': 60}}],
            ),
            (
                'age',
                ['person_1'],
                {'person_1': [{'feeling_like': 60}, 24]},
                ({'_id': 'person_1'}, {'age': True}),
                [{'_id': 'person_1', 'age': [{'feeling_like': 60}, 24]}],
            ),
        ]

        for field, ids, docs, req, result in test_values:
            self.setUp()
            target = MongoRangeTarget(self.mongo_client, INDEX, COLLECTION, ids, field)
            target.write(docs)
            self.assertEqual(result, list(self.collection.find(*req)))
            self.tearDown()
Exemplo n.º 2
0
    def test_read(self):
        test_values = [
            ('age', [], {}),
            ('age', ['unknown_person'], {}),
            ('age', ['person_1', 'person_3'], {'person_1': 11, 'person_3': 13}),
            ('age', ['person_1', 'person_3', 'person_5'], {'person_1': 11, 'person_3': 13}),
            ('experience', ['person_1', 'person_3'], {'person_1': 10}),
            ('experience', ['person_1', 'person_3', 'person_5'], {'person_1': 10}),
        ]

        for field, ids, result in test_values:
            target = MongoRangeTarget(self.mongo_client, INDEX, COLLECTION, ids, field)
            self.assertEqual(result, target.read())
Exemplo n.º 3
0
    def test_exists(self):
        test_values = [
            ('age', [], True),
            ('age', ['person_1', 'person_2', 'person_3'], True),
            ('experience', ['person_1', 'person_2', 'person_3', 'person_4'], False),
            ('experience', ['person_1', 'person_2'], True),
            ('unknow_field', ['person_1', 'person_2'], False),
            ('experience', ['unknow_person'], False),
            ('experience', ['person_1', 'unknown_person'], False),
            ('experience', ['person_3', 'unknown_person'], False),
        ]

        for field, ids, result in test_values:
            target = MongoRangeTarget(self.mongo_client, INDEX, COLLECTION, ids, field)
            self.assertEqual(result, target.exists())
    def run(self):
        print("Beginning StoreTranslation() task ...")
        print("Storing translated text in MongoDB ...")
        client = initialize_mongo()
        dump = {}
        with self.input().open("r") as in_file:
            for translated_text in in_file:
                translated_text = json.loads(translated_text)
                for doc_id, text in translated_text.items():
                    # target = MongoCellTarget(client, self.db, self.collection, doc_id, self.field)
                    # target.write(label)
                    dump[doc_id] = text
        doc_ids = list(dump.keys())
        target = MongoRangeTarget(client, self.db, self.collection, doc_ids, self.field)
        target.write(dump)
        print("StoreTranslation() task complete")
        print("{} image text translations stored in MongoDB".format(len(doc_ids)))

        # Write a dummy output file so that StoreLabel's dependency is fulfilled
        with self.output().open("w") as out_file:
            out_file.write("done")
 def run(self):
     print("Beginning StoreLabel() task ...")
     print("Storing keyword filter labels in MongoDB ...")
     client = initialize_mongo()
     dump = {}
     with self.input().open("r") as in_file:
         for filtered_text in in_file:
             filtered_text = json.loads(filtered_text)
             for doc_id, label in filtered_text.items():
                 # target = MongoCellTarget(client, self.db, self.collection, doc_id, self.field)
                 # target.write(label)
                 # target.exists()
                 dump[doc_id] = label
     doc_ids = list(dump.keys())
     target = MongoRangeTarget(client, self.db, self.collection, doc_ids, self.field)
     target.write(dump)
     target.exists()
     print("StoreLabel() task complete")
     print("{} new labels stored in MongoDB".format(len(doc_ids)))
     print(
         "{} posts contain keywords".format(
             sum(value == 1 for value in dump.values())
         )
     )
     print("Clearing all out_files ...")
     os.remove("urls.txt")
     os.remove("extracted_text.txt")
     os.remove("translated_text.txt")
     os.remove("filtered_text.txt")
     os.remove("dummy_extraction.txt")
     os.remove("dummy_translation.txt")
     print("Out_files cleared")