示例#1
0
    def test_mongo_data(self):
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records')
        print(result)
        print('self.data_all= ', len(self.data_all))
        first_insert_time = datetime.now(tz=local_tz)

        self.assertEqual(result, (len(self.data_all) - 6 + 1, 0),
                         '6 - docs with cookies from minfin in "record";'
                         'in "data_active" present only 1 doc with cookies')
        # TODO: could be poblem. In records inserted not all cookies, in data_active inserted all cookies
        self.assertEqual(result[0], records.find({}).count(), 'should present 1 doc with cookies')
        self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1)
        self.assertEqual(data_active.find({}).count(), len(self.data_all), 'should be {} in "data_active"'.format(
            len(self.data_all)))
        self.assertEqual(data_active.find({}).count(), records.find({}).count() + 6 - 1)

        test_record_times = {}
        test_record_first_update_time = {}
        for spider, bid in self.test_bid.items():
            test_record_times[spider] = data_active.find_one({'bid': bid})['time']
            doc = data_active.find_one({'bid': bid})
            doc = date_depends_time(doc)    # fix date if time is grade then current TODO: need check, if change need in code
            self.assertLess(doc['time'], doc['time_update'])
            test_record_first_update_time[spider] = data_active.find_one({'bid': bid})['time_update']
        sleep(1)

# second insert of same data
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records')
        self.assertEqual(result, (0, 0))
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE "time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
                'time_update should be changed in UPDATE '

        assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \
                                                                   '"data_active"'.format(len(self.data_all))
        self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1)
        assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox_del),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test_del)], 'records')
        print(result)
        self.assertEqual(result, (0, 8))
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \
                                                                                            '"time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
            'time_update should be changed in UPDATE'
示例#2
0
def get_selection() -> (set, set, set, set):
    selections_result = data_active.find({}, {
        'location': 1,
        'operation': 1,
        'currency': 1,
        'source': 1,
        '_id': 0
    })
    locations = set()
    operations = set()
    currencies = set()
    sources = set()
    for i in selections_result:
        locations.add(i.get('location', 'None'))
        operations.add(i.get('operation', 'None'))
        currencies.add(i.get('currency', 'None'))
        sources.add(i.get('source', 'None'))
    return locations, operations, currencies, sources
    def test_insert(self):
        result = parent([(data_api_minfin, self.get_triple_data_test),], 'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        assert result == (308, 0), 'wrong parent return, etalon is 308 records'
        assert result[0] == records.find({}).count(), 'function insert report wrong'
        assert records.find({}).count() == 308, 'should be 308 in "records"'
        assert data_active.find({}).count() == 308, 'should be 308 in "data_active"'
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \
                                                                         'should be equal'
        assert records.find({'session': True}).count() == 6, 'should be 6 cookies in "records"'
        assert data_active.find({'session': True}).count() == 6, 'should be 6 cookies in "data_active"'
        test_record_time = data_active.find_one({'bid': '32166583'})['time']
        assert data_active.find_one({'bid': '32166583'})['time'] < \
               data_active.find_one({'bid': '32166583'})['time_update']
        test_record_first_update_time = data_active.find_one({'bid': '32166583'})['time_update']

        # second insert of same data
        result = parent([(data_api_minfin, self.get_triple_data_test), ], 'records')
        assert result == (308, 0), 'wrong parent return, etalon is 308 records'
        assert data_active.find_one({'bid': '32166583'})['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': '32166583'})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
        assert data_active.find({}).count() == 308, 'should be 308 in "data_active"'
        assert records.find({}).count() == 308 * 2, 'should be 616 in "records"'
        assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([(data_api_minfin, self.get_triple_data_test_del), ], 'records')
        print(result)
        assert result == (302, 6), 'wrong parent return, etalon is 302 records'
        assert result[0] == data_active.find({}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == 302, 'should be 302 in "records"'
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        assert data_active.find({'session': True}).count() == 6, 'should be 6 cookies in "data_active"'
        assert data_active.find_one({'bid': '32166583'})['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': '32166583'})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
    def test_insert(self):
        result = parent([(data_api_finance_ua, self.fetch_data_test),], 'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        assert result == (len(self.data), 0), 'wrong parent return, etalon is {} records'.format(len(self.data))
        assert result[0] == records.find({}).count(), 'function insert report wrong'
        assert records.find({}).count() == len(self.data), 'should be {} in "records"'.format(len(self.data))
        assert data_active.find({}).count() == len(self.data), 'should be {} in "data_active"'.format(len(self.data))
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \
                                                                         'should be equal'
        test_bid = 960072
        test_record_time = data_active.find_one({'bid': test_bid})['time']
        bid_time = data_active.find_one({'bid': test_bid})['time']
        bid_time_update = data_active.find_one({'bid': test_bid})['time_update']
        # assert bid_time < bid_time_update, 'bid_time= {}, bid_time_update= {}'.format(bid_time, bid_time_update)
        test_record_first_update_time = data_active.find_one({'bid': test_bid})['time_update']

        # second insert of same data
        result = parent([(data_api_finance_ua, self.fetch_data_test),], 'records')
        assert result == (len(self.data), 0), 'wrong parent return, etalon is {} records'.format(len(self.data))
        assert data_active.find_one({'bid': test_bid})['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
        assert data_active.find({}).count() == len(self.data), 'should be {} in "data_active"'.format(len(self.data))
        assert records.find({}).count() == len(self.data) * 2, 'should be {} in "records"'.format(len(self.data)*2)
        assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([(data_api_finance_ua, self.fetch_data_test_del), ], 'records')
        print(result)
        assert result == (len(self.data) - 1, 1), 'wrong parent return, etalon is {} records'.format(len(self.data)-1)
        assert result[0] == data_active.find({}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == len(self.data) - 1, 'should be {} in "records"'.format(len(self.data)-1)
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        assert data_active.find_one({'bid': test_bid})['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
示例#5
0
    def test_mongo_data(self):
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
             (parse_minfin.data_api_minfin, self.get_triple_data_test)],
            'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        self.assertEqual(
            result, (len(self.data_all), 0),
            'wrong parent return, etalon is {} records'.format(
                len(self.data_all)))
        self.assertEqual(result[0], records.find({}).count(), 'function reported = {}, ' \
                                                      'docs in DB= {}'.format(result[0], records.find({}).count()))
        assert records.find({}).count() == len(
            self.data_all), 'should be {} in "records"'.format(
                len(self.data_all))
        assert data_active.find({}).count() == len(
            self.data_all), 'should be {} in "data_active"'.format(
                len(self.data_all))
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and ' \
                                                                         '"data_active" should be equal'

        test_record_times = {}
        test_record_first_update_time = {}
        for spider, bid in self.test_bid.items():
            test_record_times[spider] = data_active.find_one({'bid':
                                                              bid})['time']
            doc = data_active.find_one({'bid': bid})
            assert doc['time'] < doc['time_update'], 'bid {bid} with time {time}, ' \
                                                     'but in DB update {time_update}'.format(bid=bid, time=doc['time'],
                                                                                             time_update=doc['time_update'])
            test_record_first_update_time[spider] = data_active.find_one(
                {'bid': bid})['time_update']
        sleep(1)

        # second insert of same data
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
             (parse_minfin.data_api_minfin, self.get_triple_data_test)],
            'records')
        assert result == (len(self.data_all), 0), 'wrong parent return ={}, ' \
                                                  'etalon is {} records'.format(result, len(self.data_all))
        for spider, bid in self.test_bid.items():
            assert data_active.find_one(
                {'bid': bid})['time'] == test_record_times[
                    spider], 'after UPDATE "time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
                'time_update should be changed in UPDATE '

        assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \
                                                                   '"data_active"'.format(len(self.data_all))
        assert records.find({}).count() == len(self.data_all) * 2, 'should be {} in ' \
                                                                   '"records"'.format(len(self.data_all) * 2)
        assert first_insert_time < data_active.find_one(
        )['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox_del),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del),
             (parse_minfin.data_api_minfin, self.get_triple_data_test_del)],
            'records')
        print(result)
        assert result == (len(self.data_all) - 8, 8), 'wrong parent return, ' \
                                                      'etalon is {} records'.format(len(self.data_all) - 8)
        assert result[0] == data_active.find(
            {}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == len(self.data_all) - 8, 'should be {} ' \
                                                                       'in "records"'.format(len(self.data_all) - 8)
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \
                                                                                            '"time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
            'time_update should be changed in UPDATE'
示例#6
0
    def test_mongo_data(self):
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        self.assertEqual(result, (len(self.data_all), 0),
                         'wrong parent return, etalon is {} records'.format(len(self.data_all)))
        self.assertEqual(result[0], records.find({}).count(), 'function reported = {}, ' \
                                                      'docs in DB= {}'.format(result[0], records.find({}).count()))
        assert records.find({}).count() == len(self.data_all), 'should be {} in "records"'.format(len(self.data_all))
        assert data_active.find({}).count() == len(self.data_all), 'should be {} in "data_active"'.format(
            len(self.data_all))
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and ' \
                                                                         '"data_active" should be equal'

        test_record_times = {}
        test_record_first_update_time = {}
        for spider, bid in self.test_bid.items():
            test_record_times[spider] = data_active.find_one({'bid': bid})['time']
            doc = data_active.find_one({'bid': bid})
            assert doc['time'] < doc['time_update'], 'bid {bid} with time {time}, ' \
                                                     'but in DB update {time_update}'.format(bid=bid, time=doc['time'],
                                                                                             time_update=doc['time_update'])
            test_record_first_update_time[spider] = data_active.find_one({'bid': bid})['time_update']
        sleep(1)

# second insert of same data
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test)], 'records')
        assert result == (len(self.data_all), 0), 'wrong parent return ={}, ' \
                                                  'etalon is {} records'.format(result, len(self.data_all))
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE "time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
                'time_update should be changed in UPDATE '

        assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \
                                                                   '"data_active"'.format(len(self.data_all))
        assert records.find({}).count() == len(self.data_all) * 2, 'should be {} in ' \
                                                                   '"records"'.format(len(self.data_all) * 2)
        assert first_insert_time < data_active.find_one()['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([(berlox.data_api_berlox, self.fetch_data_test_berlox_del),
                         (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del),
                         (parse_minfin.data_api_minfin, self.get_triple_data_test_del)], 'records')
        print(result)
        assert result == (len(self.data_all) - 8, 8), 'wrong parent return, ' \
                                                      'etalon is {} records'.format(len(self.data_all) - 8)
        assert result[0] == data_active.find({}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == len(self.data_all) - 8, 'should be {} ' \
                                                                       'in "records"'.format(len(self.data_all) - 8)
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \
                                                                                            '"time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
            'time_update should be changed in UPDATE'
示例#7
0
    def test_mongo_data(self):
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
             (parse_minfin.data_api_minfin, self.get_triple_data_test)],
            'records')
        print(result)
        print('self.data_all= ', len(self.data_all))
        first_insert_time = datetime.now(tz=local_tz)

        self.assertEqual(
            result, (len(self.data_all) - 6 + 1, 0),
            '6 - docs with cookies from minfin in "record";'
            'in "data_active" present only 1 doc with cookies')
        # TODO: could be poblem. In records inserted not all cookies, in data_active inserted all cookies
        self.assertEqual(result[0],
                         records.find({}).count(),
                         'should present 1 doc with cookies')
        self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1)
        self.assertEqual(
            data_active.find({}).count(), len(self.data_all),
            'should be {} in "data_active"'.format(len(self.data_all)))
        self.assertEqual(
            data_active.find({}).count(),
            records.find({}).count() + 6 - 1)

        test_record_times = {}
        test_record_first_update_time = {}
        for spider, bid in self.test_bid.items():
            test_record_times[spider] = data_active.find_one({'bid':
                                                              bid})['time']
            doc = data_active.find_one({'bid': bid})
            doc = date_depends_time(
                doc
            )  # fix date if time is grade then current TODO: need check, if change need in code
            self.assertLess(doc['time'], doc['time_update'])
            test_record_first_update_time[spider] = data_active.find_one(
                {'bid': bid})['time_update']
        sleep(1)

        # second insert of same data
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA),
             (parse_minfin.data_api_minfin, self.get_triple_data_test)],
            'records')
        self.assertEqual(result, (0, 0))
        for spider, bid in self.test_bid.items():
            assert data_active.find_one(
                {'bid': bid})['time'] == test_record_times[
                    spider], 'after UPDATE "time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
                'time_update should be changed in UPDATE '

        assert data_active.find({}).count() == len(self.data_all), 'should be {} in ' \
                                                                   '"data_active"'.format(len(self.data_all))
        self.assertEqual(records.find({}).count(), len(self.data_all) - 6 + 1)
        assert first_insert_time < data_active.find_one(
        )['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent(
            [(berlox.data_api_berlox, self.fetch_data_test_berlox_del),
             (finance_ua.data_api_finance_ua, self.fetch_data_test_finUA_del),
             (parse_minfin.data_api_minfin, self.get_triple_data_test_del)],
            'records')
        print(result)
        self.assertEqual(result, (0, 8))
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        for spider, bid in self.test_bid.items():
            assert data_active.find_one({'bid': bid})['time'] == test_record_times[spider], 'after UPDATE ' \
                                                                                            '"time" should be same'
            assert data_active.find_one({'bid': bid})['time_update'] > test_record_first_update_time[spider], \
            'time_update should be changed in UPDATE'
示例#8
0
    selections_result = data_active.find({}, {
        'location': 1,
        'operation': 1,
        'currency': 1,
        'source': 1,
        '_id': 0
    })
    locations = set()
    operations = set()
    currencies = set()
    sources = set()
    for i in selections_result:
        locations.add(i.get('location', 'None'))
        operations.add(i.get('operation', 'None'))
        currencies.add(i.get('currency', 'None'))
        sources.add(i.get('source', 'None'))
    return locations, operations, currencies, sources


if __name__ == '__main__':
    result = data_active.find({
        'location': location,
        'currency': currency,
        'operation': operation,
        '$text': {
            '$search': filter_or
        }
    })
    for p in result:
        print(p)
示例#9
0
    def test_insert(self):
        result = parent([
            (data_api_finance_ua, self.fetch_data_test),
        ], 'records')
        print(result)
        first_insert_time = datetime.now(tz=local_tz)
        assert result == (len(
            self.data), 0), 'wrong parent return, etalon is {} records'.format(
                len(self.data))
        assert result[0] == records.find(
            {}).count(), 'function insert report wrong'
        assert records.find({}).count() == len(
            self.data), 'should be {} in "records"'.format(len(self.data))
        assert data_active.find({}).count() == len(
            self.data), 'should be {} in "data_active"'.format(len(self.data))
        assert data_active.find({}).count() == records.find({}).count(), 'after first insert "records" and "data_active"' \
                                                                         'should be equal'
        test_bid = 960072
        test_record_time = data_active.find_one({'bid': test_bid})['time']
        bid_time = data_active.find_one({'bid': test_bid})['time']
        bid_time_update = data_active.find_one({'bid':
                                                test_bid})['time_update']
        # assert bid_time < bid_time_update, 'bid_time= {}, bid_time_update= {}'.format(bid_time, bid_time_update)
        test_record_first_update_time = data_active.find_one({'bid': test_bid
                                                              })['time_update']

        # second insert of same data
        result = parent([
            (data_api_finance_ua, self.fetch_data_test),
        ], 'records')
        assert result == (len(
            self.data), 0), 'wrong parent return, etalon is {} records'.format(
                len(self.data))
        assert data_active.find_one({
            'bid': test_bid
        })['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'
        assert data_active.find({}).count() == len(
            self.data), 'should be {} in "data_active"'.format(len(self.data))
        assert records.find({}).count() == len(
            self.data) * 2, 'should be {} in "records"'.format(
                len(self.data) * 2)
        assert first_insert_time < data_active.find_one(
        )['time_update'], 'time_update is not newer'

        # 3-d inssert of data with deleted records
        result = parent([
            (data_api_finance_ua, self.fetch_data_test_del),
        ], 'records')
        print(result)
        assert result == (
            len(self.data) - 1,
            1), 'wrong parent return, etalon is {} records'.format(
                len(self.data) - 1)
        assert result[0] == data_active.find(
            {}).count(), 'function insert report wrong'
        assert data_active.find({}).count() == len(
            self.data) - 1, 'should be {} in "records"'.format(
                len(self.data) - 1)
        assert data_active.find({}).count() != records.find({}).count(), 'after insert "records" and "data_active"' \
                                                                         'should not be equal'
        assert data_active.find_one({
            'bid': test_bid
        })['time'] == test_record_time, 'after UPDATE "time" should be same'
        assert data_active.find_one({'bid': test_bid})['time_update'] > test_record_first_update_time, \
            'time_update should be changed in UPDATE'