Пример #1
0
    def test_bson_ts_to_long(self):
        """Test bson_ts_to_long and long_to_bson_ts
        """

        tstamp = timestamp.Timestamp(0x12345678, 0x90abcdef)

        self.assertEqual(0x1234567890abcdef, bson_ts_to_long(tstamp))
        self.assertEqual(long_to_bson_ts(0x1234567890abcdef), tstamp)
    def test_Timestamps(self):
        """Tests mongo operations with Timestamps"""
        conn = yield txmongo.MongoConnection(mongo_host, mongo_port)
        test = conn.foo.test_ts

        test.drop()

        # insert with specific timestamp
        doc1 = {'_id':objectid.ObjectId(),
                'ts':timestamp.Timestamp(1, 2)}
        yield test.insert(doc1, safe=True)

        result = yield test.find_one(doc1)
        self.assertEqual(result.get('ts').time, 1)
        self.assertEqual(result.get('ts').inc, 2)

        # insert with specific timestamp
        doc2 = {'_id':objectid.ObjectId(),
                'ts':timestamp.Timestamp(2, 1)}
        yield test.insert(doc2, safe=True)

        # the objects come back sorted by ts correctly.
        # (test that we stored inc/time in the right fields)
        result = yield test.find(filter=qf.sort(qf.ASCENDING('ts')))
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0]['_id'], doc1['_id'])
        self.assertEqual(result[1]['_id'], doc2['_id'])

        # insert with null timestamp
        doc3 = {'_id':objectid.ObjectId(),
                'ts':timestamp.Timestamp(0, 0)}
        yield test.insert(doc3, safe=True)

        # time field loaded correctly
        result = yield test.find_one(doc3['_id'])
        now = time.time()
        self.assertTrue(now - 2 <= result['ts'].time <= now)

        # delete
        yield test.remove(doc1["_id"], safe=True)
        yield test.remove(doc2["_id"], safe=True)
        yield test.remove(doc3["_id"], safe=True)

        # disconnect
        yield conn.disconnect()
Пример #3
0
 def load_request_attr(question, request):
     question_attrs = [
         'description', 'answer', 'dismissed', 'category', 'date'
     ]
     for attr_name in question_attrs:
         attr = request.json.get(attr_name)
         if attr_name == 'date':
             attr = timestamp.Timestamp(int(attr) // 1000, 1)
         question[attr_name] = attr
Пример #4
0
    def test_bson_ts_to_long(self):
        """Test bson_ts_to_long and long_to_bson_ts
        """

        ts = timestamp.Timestamp(0x12345678, 0x90abcdef)

        self.assertEqual(0x1234567890abcdef, bson_ts_to_long(ts))
        self.assertEqual(long_to_bson_ts(0x1234567890abcdef), ts)
        print("PASSED BSON TS TO LONG")
Пример #5
0
def oplog_has_aged_out(client, state, tap_stream_id):
    earliest_ts_row = client.local.oplog.rs.find_one(sort=[('$natural', pymongo.ASCENDING)])

    earliest_ts = earliest_ts_row.get('ts')

    stream_state = state.get('bookmarks', {}).get(tap_stream_id)
    if not stream_state or not stream_state.get('oplog_ts_time'):
        return False

    bookmarked_ts = timestamp.Timestamp(stream_state['oplog_ts_time'],
                                        stream_state['oplog_ts_inc'])

    return bookmarked_ts < earliest_ts
Пример #6
0
def sync_collection(client, stream, state, stream_projection):
    tap_stream_id = stream['tap_stream_id']
    LOGGER.info('Starting oplog sync for %s', tap_stream_id)

    md_map = metadata.to_map(stream['metadata'])
    database_name = metadata.get(md_map, (), 'database-name')
    collection_name = stream.get("table_name")
    stream_state = state.get('bookmarks', {}).get(tap_stream_id)

    oplog_ts = timestamp.Timestamp(stream_state['oplog_ts_time'],
                                   stream_state['oplog_ts_inc'])

    # Write activate version message
    version = common.get_stream_version(tap_stream_id, state)
    activate_version_message = singer.ActivateVersionMessage(
        stream=common.calculate_destination_stream_name(stream),
        version=version)
    singer.write_message(activate_version_message)

    time_extracted = utils.now()
    rows_saved = 0
    start_time = time.time()

    oplog_query = {'ts': {'$gte': oplog_ts}}

    projection = transform_projection(stream_projection)

    oplog_replay = stream_projection is None

    LOGGER.info(
        'Querying %s with:\n\tFind Parameters: %s\n\tProjection: %s\n\toplog_replay: %s',
        tap_stream_id, oplog_query, projection, oplog_replay)

    update_buffer = set()

    # consider adding oplog_replay, but this would require removing the projection
    # default behavior is a non_tailable cursor but we might want a tailable one
    # regardless of whether its long lived or not.
    with client.local.oplog.rs.find(oplog_query,
                                    projection,
                                    sort=[('$natural', pymongo.ASCENDING)],
                                    oplog_replay=oplog_replay) as cursor:
        for row in cursor:
            # assertions that mongo is respecing the ts query and sort order
            if row.get('ts') and row.get('ts') < oplog_ts:
                raise common.MongoAssertionException(
                    "Mongo is not honoring the query param")
            if row.get('ts') and row.get('ts') < timestamp.Timestamp(
                    stream_state['oplog_ts_time'],
                    stream_state['oplog_ts_inc']):
                raise common.MongoAssertionException(
                    "Mongo is not honoring the sort ascending param")

            if row.get('ns') != '{}.{}'.format(database_name, collection_name):
                if row.get('ts'):
                    state = update_bookmarks(state, tap_stream_id, row['ts'])
                continue

            row_op = row['op']
            if row_op == 'i':

                record_message = common.row_to_singer_record(
                    stream, row['o'], version, time_extracted)
                singer.write_message(record_message)

                rows_saved += 1

            elif row_op == 'u':
                update_buffer.add(row['o2']['_id'])

            elif row_op == 'd':

                # remove update from buffer if that document has been deleted
                if row['o']['_id'] in update_buffer:
                    update_buffer.remove(row['o']['_id'])

                # Delete ops only contain the _id of the row deleted
                row['o'][SDC_DELETED_AT] = row['ts']

                record_message = common.row_to_singer_record(
                    stream, row['o'], version, time_extracted)
                singer.write_message(record_message)

                rows_saved += 1

            state = update_bookmarks(state, tap_stream_id, row['ts'])

            # flush buffer if it has filled up
            if len(update_buffer) >= MAX_UPDATE_BUFFER_LENGTH:
                for buffered_row in flush_buffer(client, update_buffer,
                                                 stream_projection,
                                                 database_name,
                                                 collection_name):
                    record_message = common.row_to_singer_record(
                        stream, buffered_row, version, time_extracted)
                    singer.write_message(record_message)

                    rows_saved += 1
                update_buffer = set()

            # write state every UPDATE_BOOKMARK_PERIOD messages
            if rows_saved % common.UPDATE_BOOKMARK_PERIOD == 0:
                # flush buffer before writing state
                for buffered_row in flush_buffer(client, update_buffer,
                                                 stream_projection,
                                                 database_name,
                                                 collection_name):
                    record_message = common.row_to_singer_record(
                        stream, buffered_row, version, time_extracted)
                    singer.write_message(record_message)

                    rows_saved += 1
                update_buffer = set()

                # write state
                singer.write_message(
                    singer.StateMessage(value=copy.deepcopy(state)))

        # flush buffer if finished with oplog
        for buffered_row in flush_buffer(client, update_buffer,
                                         stream_projection, database_name,
                                         collection_name):
            record_message = common.row_to_singer_record(
                stream, buffered_row, version, time_extracted)

            singer.write_message(record_message)
            rows_saved += 1

    common.COUNTS[tap_stream_id] += rows_saved
    common.TIMES[tap_stream_id] += time.time() - start_time
    LOGGER.info('Syncd %s records for %s', rows_saved, tap_stream_id)
Пример #7
0
def update_wrong_questions_file():
    import os
    from werkzeug.utils import secure_filename
    from .utils import get_upload_path

    uid = current_user.get_id()
    db = get_db()
    resp = {}

    f = request.files['file']
    import uuid
    hashname = str(uuid.uuid4()) + os.path.splitext(f.filename)[-1]
    question = {'uid': uid}
    question['description'] = request.form.get('description')
    question['date'] = timestamp.Timestamp(
        int(request.form.get('date')) // 1000, 1)
    question['fname'] = f.filename
    question['hashname'] = hashname
    question['dismissed'] = boolean(request.form.get('dismissed')) is True
    question['category'] = request.form.get('category')
    question['answer'] = request.form.get('answer')
    question['url'] = 'https://netwx.c-leon.top/api/uploads/' + hashname

    if request.method == 'PUT':
        _id = request.form.get('_id')
        condition = {'uid': uid, '_id': ObjectId(_id)}
        ori_question = db.question.find_one(condition)
        if not ori_question:
            resp['success'] = False
            resp['message'] = '_id not found'
            return jsonify(resp)
        else:
            try:
                os.remove(
                    os.path.join(
                        get_upload_path(),
                        str(ori_question['hashname'])))  #secure_filename(name)
            except FileNotFoundError:
                resp['success'] = False
                resp['message'] = 'original file not found.'
                return jsonify(resp)
            ori_question.update(question)
            update_result = db.question.update_one(condition,
                                                   {'$set': ori_question})
            upload_path = os.path.join(
                get_upload_path(),
                str(question['hashname']))  # secure_filename(name)
            f.save(upload_path)
            # print(result.raw_result)
            resp['success'] = True
            resp['matched_count'] = update_result.matched_count
            resp['modified_count'] = update_result.modified_count
            return jsonify(resp)

    upload_path = os.path.join(get_upload_path(), str(
        question['hashname']))  #secure_filename(name)
    f.save(upload_path)
    result = db.question.insert_one(question)
    resp['_id'] = str(result.inserted_id)
    resp['success'] = True
    resp['message'] = 'upload success'
    return jsonify(resp)
Пример #8
0
def sync_oplog_stream(client, streams, state):

    streams_map = generate_streams_map(streams)

    #for tap_stream_id in streams_map.keys():
    #    common.whitelist_bookmark_keys(BOOKMARK_KEYS, tap_stream_id, state)

    for tap_stream_id, bookmark in state.get('bookmarks', {}).items():
        oplog_ts = min([timestamp.Timestamp(v['oplog_ts_time'], v['oplog_ts_inc'])
                        for k,v in state.get('bookmarks', {}).items()
                        if streams_map.get(k)])

        LOGGER.info("Starting oplog replication with ts=%s", oplog_ts)

        time_extracted = utils.now()

        rows_saved = 0
        ops_skipped = 0

        with client.local.oplog.rs.find({'ts': {'$gt': oplog_ts}},
                                        oplog_replay=True) as cursor:

            while cursor.alive:
                try:
                    row = next(cursor)

                    if row['op'] == 'n':
                        LOGGER.debug('Skipping noop op')
                    elif not streams_map.get(generate_tap_stream_id_for_row(row)):
                        ops_skipped = ops_skipped + 1

                        if ops_skipped % UPDATE_BOOKMARK_PERIOD == 0:
                            LOGGER.info("Skipped %s ops so far as they were not for selected tables; %s rows extracted",
                                        ops_skipped,
                                        rows_saved)
                    else:
                        rows_saved += 1
                        row_op = row['op']
                        if row_op in ['i']:
                            tap_stream_id = generate_tap_stream_id_for_row(row)
                            stream_map_entry = streams_map[tap_stream_id]
                            whitelisted_row = {k:v for k,v in row['o'].items() if k not in stream_map_entry['blacklist']}
                            record_message = common.row_to_singer_record(stream_map_entry['stream'],
                                                                        whitelisted_row,
                                                                        common.get_stream_version(tap_stream_id, state),
                                                                        time_extracted)

                            singer.write_message(record_message)
                        if row_op in ['u']:
                            tap_stream_id = generate_tap_stream_id_for_row(row)
                            stream_map_entry = streams_map[tap_stream_id]

                            # if '$set' in row['o'].keys():
                            #     obj = dict(row['o2'], **row['o']['$set'])
                            # else:
                            #     obj = row['o']

                            whitelisted_row = {k:v for k,v in row['o'].items() if k not in stream_map_entry['blacklist']}
                            record_message = common.row_to_singer_record(stream_map_entry['stream'],
                                                                        whitelisted_row,
                                                                        common.get_stream_version(tap_stream_id, state),
                                                                        time_extracted)

                            singer.write_message(record_message)
                        elif row_op == 'd':
                            tap_stream_id = generate_tap_stream_id_for_row(row)
                            stream_map_entry = streams_map[tap_stream_id]

                            # Delete ops only contain the _id of the row deleted
                            whitelisted_row = []
                            whitelisted_row['_id'] = row['o']['_id']
                            whitelisted_row[SDC_DELETED_AT] = row['ts']

                            record_message = common.row_to_singer_record(stream_map_entry['stream'],
                                                                        whitelisted_row,
                                                                        common.get_stream_version(tap_stream_id, state),
                                                                        time_extracted)
                            singer.write_message(record_message)
                        else:
                            LOGGER.info("Skipping op for table %s as it is not an INSERT, UPDATE, or DELETE", row['ns'])

                    state = update_bookmarks(state,
                                             streams_map,
                                             row['ts'])
                except InvalidBSON as e:
                    LOGGER.info(e)
                    continue

                if rows_saved % 1000 == 0:
                    singer.write_state(state)

            # Send state message at the end
            singer.write_state(state)
Пример #9
0
def sync_oplog_stream(client, streams, state):
    streams_map = generate_streams_map(streams)

    for tap_stream_id in streams_map.keys():
        common.whitelist_bookmark_keys(BOOKMARK_KEYS, tap_stream_id, state)

    for tap_stream_id, bookmark in state.get('bookmarks', {}).items():
        columns = streams_map.get(tap_stream_id)

        if not columns:
            continue

        oplog_ts = min([timestamp.Timestamp(v['oplog_ts_time'], v['oplog_ts_inc'])
                        for k,v in state.get('bookmarks', {}).items()
                        if streams_map.get(k)])

        LOGGER.info("Starting oplog replication with ts=%s", oplog_ts)

        time_extracted = utils.now()

        rows_saved = 0
        ops_skipped = 0

        with client.local.oplog.rs.find({'ts': {'$gt': oplog_ts}},
                                        oplog_replay=True) as cursor:
            for row in cursor:
                if row['op'] == 'n':
                    LOGGER.info('Skipping noop op')
                elif not streams_map.get(generate_tap_stream_id_for_row(row)):
                    ops_skipped = ops_skipped + 1

                    if ops_skipped % UPDATE_BOOKMARK_PERIOD == 0:
                        LOGGER.info("Skipped %s ops so far as they were not for selected tables; %s rows extracted",
                                    ops_skipped,
                                    rows_saved)
                else:
                    row_op = row['op']
                    if row_op in ['i', 'u']:
                        tap_stream_id = generate_tap_stream_id_for_row(row)
                        stream_map_entry = streams_map[tap_stream_id]
                        whitelisted_row = {k:v for k,v in row['o'].items() if k in stream_map_entry['columns']}
                        record_message = common.row_to_singer_record(stream_map_entry['stream'],
                                                                     whitelisted_row,
                                                                     common.get_stream_version(tap_stream_id, state),
                                                                     time_extracted)

                        singer.write_message(record_message)

                    elif row_op == 'd':
                        tap_stream_id = generate_tap_stream_id_for_row(row)
                        stream_map_entry = streams_map[tap_stream_id]

                        # Delete ops only contain the _id of the row deleted
                        whitelisted_row = {column_name:None for column_name in stream_map_entry['columns']}

                        whitelisted_row['_id'] = row['o']['_id']
                        whitelisted_row[SDC_DELETED_AT] = row['ts']

                        record_message = common.row_to_singer_record(stream_map_entry['stream'],
                                                                     whitelisted_row,
                                                                     common.get_stream_version(tap_stream_id, state),
                                                                     time_extracted)
                        singer.write_message(record_message)
                    else:
                        LOGGER.info("Skipping op for table %s as it is not an INSERT, UPDATE, or DELETE", row['ns'])

                state = update_bookmarks(state,
                                         streams_map,
                                         row['ts'])