示例#1
0
    def sync_data(self, parent_ids):
        table = self.TABLE

        self.write_schema()

        start_date = get_last_record_value_for_table(
            self.state, table, 'start_date')

        if start_date is None:
            start_date = get_config_start_date(self.config)
        else:
            start_date = parse(start_date)

        technician_id = get_last_record_value_for_table(
            self.state, table, 'technician_id')

        if technician_id is None:
            technician_id = 0

        for start_date, end_date in generate_date_windows(start_date):
            for index, parent_id in enumerate(parent_ids):
                if parent_id < technician_id:
                    continue

                LOGGER.info(
                    'Fetching %s for technician %s (%s/%s) from %s to %s',
                    table, parent_id, index + 1, len(parent_ids),
                    start_date, end_date)

                parsed_response = self.execute_request(parent_id, start_date, end_date)

                with singer.metrics.record_counter(endpoint=table) as ctr:
                    singer.write_records(table, parsed_response['rows'])

                    ctr.increment(amount=len(parsed_response['rows']))

                # technician_id acts as a substream bookmark so that we
                # can pick up in a single stream where we left off.
                self.state = incorporate(
                    self.state, table, 'technician_id', parent_id)
                # There's no need to save `start_date` here. Even in the
                # case of the first run the config start_date won't change
                # so we're safe. It's acceptable to update start_date only
                # after we're done the sync for this whole window.
                save_state(self.state)

            technician_id = 0

            self.state = incorporate(
                self.state, table, 'start_date', end_date)
            # Because we go through all the technicians every time we sync
            # we need to start over by resetting the technician_id sub
            # bookmark to 0.
            self.state = incorporate(
                self.state, table, 'technician_id', 0, force=True)
            save_state(self.state)
示例#2
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD
        done = False

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info('Attempting to get the most recent bookmark_date for entity {}.'.format(self.ENTITY))
        bookmark_date = get_last_record_value_for_table(self.state, table, 'bookmark_date')

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info('Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.')
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        # Convert bookmarked start date to POSIX.
        bookmark_date_posix = int(bookmark_date.timestamp())
        
        # Create params for filtering
        if self.ENTITY == 'event':
            params = {"occurred_at[after]": bookmark_date_posix}
            bookmark_key = 'occurred_at'
        elif self.ENTITY in ['promotional_credit','comment']:
            params = {"created_at[after]": bookmark_date_posix}
            bookmark_key = 'created_at'
        else:
            params = {"updated_at[after]": bookmark_date_posix}
            bookmark_key = 'updated_at'

        # Add sort_by[asc] to prevent data overwrite by oldest deleted records
        if self.SORT_BY is not None:
            params['sort_by[asc]'] = self.SORT_BY

        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))

        while not done:
            max_date = bookmark_date

            response = self.client.make_request(
                url=self.get_url(),
                method=api_method,
                params=params)

            if 'api_error_code' in response.keys():
                if response['api_error_code'] == 'configuration_incompatible':
                    LOGGER.error('{} is not configured'.format(response['error_code']))
                    break

            records = response.get('list')
            
            to_write = self.get_stream_data(records)
            
            if self.config.get('include_deleted') not in ['false','False', False]:
                if self.ENTITY == 'event':
                    for event in to_write:
                        if event["event_type"] == 'plan_deleted':
                            Util.plans.append(event['content']['plan'])
                        elif event['event_type'] == 'addon_deleted':
                            Util.addons.append(event['content']['addon'])
                        elif event['event_type'] == 'coupon_deleted':
                            Util.coupons.append(event['content']['coupon'])
                if self.ENTITY == 'plan':
                    for plan in Util.plans:
                        to_write.append(plan)
                if self.ENTITY == 'addon':
                    for addon in Util.addons:
                        to_write.append(addon)
                if self.ENTITY == 'coupon':
                    for coupon in Util.coupons:
                        to_write.append(coupon) 

            
            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))
                
                for item in to_write:
                    #if item.get(bookmark_key) is not None:
                    max_date = max(
                        max_date,
                        parse(item.get(bookmark_key))
                    )

            self.state = incorporate(
                self.state, table, 'bookmark_date', max_date)

            if not response.get('next_offset'):
                LOGGER.info("Final offset reached. Ending sync.")
                done = True
            else:
                LOGGER.info("Advancing by one offset.")
                params['offset'] = response.get('next_offset')
                bookmark_date = max_date

        save_state(self.state)
示例#3
0
    def sync_data(self):
        table = self.TABLE
        done = False

        filters = self.get_filters()
        start_date = get_last_record_value_for_table(self.state, table)

        if start_date is None:
            start_date = get_config_start_date(self.config)
        else:
            start_date = start_date.replace(tzinfo=pytz.UTC)

        td = self.get_interval()

        end_date = start_date + td

        while not done:
            max_date = start_date

            LOGGER.info("Querying {} starting at {}".format(table, start_date))

            body = {
                "startMSeconds": int(start_date.timestamp() * 1000),
                "endMSeconds": int(end_date.timestamp() * 1000),
            }

            if filters is not None:
                body["filters"] = filters

            LOGGER.info(body)

            try:
                response = self.client.make_request(self.get_url(),
                                                    "POST",
                                                    body=body)
            except RuntimeError as e:
                if "502" in str(e) or "504" in str(e):
                    # try one more time
                    response = self.client.make_request(self.get_url(),
                                                        "POST",
                                                        body=body)

                else:
                    raise e

            to_write = self.get_stream_data(response)

            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

                for item in to_write:
                    max_date = max(max_date, self.get_time_for_state(item))

            self.state = incorporate(self.state, table, "start_date",
                                     start_date)

            if max_date > datetime.datetime.now(pytz.UTC):
                done = True

            if len(to_write) == 0:
                LOGGER.info("Advancing one full interval.")

                if end_date > datetime.datetime.now(pytz.UTC):
                    done = True
                else:
                    start_date = end_date

            elif start_date == max_date:
                LOGGER.info("Advancing one millisecond.")
                start_date = start_date + datetime.timedelta(milliseconds=1)
            else:
                LOGGER.info("Advancing by one page.")
                start_date = max_date

            end_date = start_date + td

            save_state(self.state)
示例#4
0
    def sync_data(self):
        table = self.TABLE
        done = False

        start_date = get_last_record_value_for_table(self.state, table)

        if start_date is None:
            start_date = get_config_start_date(self.config)
        else:
            start_date = start_date.replace(tzinfo=pytz.UTC)

        td = datetime.timedelta(hours=1)

        end_date = start_date + td

        while not done:
            max_date = start_date

            LOGGER.info("Querying {} starting at {}".format(table, start_date))

            body = {
                "filters": {
                    "environment": self.config.get("environment"),
                    "lastUpdated": {
                        "gte": int(start_date.timestamp() * 1000),
                        "lte": int(end_date.timestamp() * 1000),
                    },
                }
            }

            LOGGER.info(body)

            response = self.client.make_request(self.get_url(),
                                                "POST",
                                                body=body)

            to_write = self.get_stream_data(response)

            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

                for item in to_write:
                    max_date = max(max_date, self.get_time_for_state(item))

            self.state = incorporate(self.state, table, "start_date",
                                     start_date)

            if max_date > datetime.datetime.now(pytz.UTC):
                done = True

            if len(to_write) == 0:
                LOGGER.info("Advancing one full interval.")

                if end_date > datetime.datetime.now(pytz.UTC):
                    done = True
                else:
                    start_date = end_date

            elif start_date == max_date:
                LOGGER.info("Advancing one second.")
                start_date = start_date + datetime.timedelta(seconds=1)

            else:
                LOGGER.info("Advancing by one page.")
                start_date = max_date

            end_date = start_date + td

            save_state(self.state)
示例#5
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info(
            'Attempting to get the most recent bookmark_date for entity {}.'.
            format(self.TABLE))
        bookmark_date = get_last_record_value_for_table(
            self.state, table, 'bookmark_date')

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info(
                'Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.'
            )
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        current_date = utils.strftime(utils.now())
        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))
        urls, spaces, mult_endpoints = self.get_url(bookmark_date,
                                                    current_date)

        key_hash = hashlib.sha256()

        for i, url in enumerate(urls):

            done = False
            updated = False
            temp_max = []
            page = 1
            params = self.get_params(bookmark_date, current_date)
            while not done:
                max_date = bookmark_date
                response = self.client.make_request(url=url,
                                                    method=api_method,
                                                    params=params)

                to_write = self.get_stream_data(response.get('results'))
                for item in to_write:

                    if self.TABLE in ('space_counts', 'space_events'):
                        item['space_id'] = spaces[i]

                    if self.TABLE == 'space_counts':
                        # Creates a surrogate primary key for this endpoint, since one doesn't exist at time of writing
                        key_hash.update(((item['space_id'] +
                                          item['timestamp']).encode('utf-8')))
                        item['id'] = key_hash.hexdigest()

                    if item.get('updated_at') is not None:
                        max_date = max(max_date, parse(item.get('updated_at')))
                        updated = True

                    elif item.get('created_at') is not None:
                        max_date = max(max_date, parse(item.get('created_at')))
                        updated = True

                    elif item.get('timestamp') is not None and mult_endpoints:
                        max_date = max(max_date, parse(item.get('timestamp')))
                        updated = True

                with singer.metrics.record_counter(endpoint=table) as ctr:
                    singer.write_records(table, to_write)
                    ctr.increment(amount=len(to_write))

                self.state = incorporate(self.state, table, 'bookmark_date',
                                         max_date)

                if not response.get('next'):
                    LOGGER.info("Final page reached. Ending sync.")
                    done = True
                else:
                    page += 1
                    LOGGER.info("Advancing by one page.")
                    params['page'] = page

        if mult_endpoints and updated:
            temp_max.append(max_date)
            max_date = min(temp_max)

        self.state = incorporate(self.state, table, 'bookmark_date', max_date)

        save_state(self.state)
示例#6
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD
        done = False

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info(
            'Attempting to get the most recent bookmark_date for entity {}.'.
            format(self.ENTITY))
        bookmark_date = get_last_record_value_for_table(
            self.state, table, 'bookmark_date')

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info(
                'Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.'
            )
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        # Convert bookmarked start date to POSIX.
        bookmark_date_posix = int(bookmark_date.timestamp())

        # Create params for filtering
        if self.ENTITY == 'event':
            params = {"occurred_at[after]": bookmark_date_posix}
            bookmark_key = 'occurred_at'
        else:
            params = {"updated_at[after]": bookmark_date_posix}
            bookmark_key = 'updated_at'

        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))

        while not done:
            max_date = bookmark_date

            response = self.client.make_request(url=self.get_url(),
                                                method=api_method,
                                                params=params)

            to_write = self.get_stream_data(response.get('list'))

            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

                for item in to_write:
                    max_date = max(max_date, parse(item.get(bookmark_key)))

            self.state = incorporate(self.state, table, 'bookmark_date',
                                     max_date)

            if not response.get('next_offset'):
                LOGGER.info("Final offset reached. Ending sync.")
                done = True
            else:
                LOGGER.info("Advancing by one offset.")
                params['offset'] = response.get('next_offset')
                bookmark_date = max_date

        save_state(self.state)
示例#7
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD
        done = False
        sync_interval_in_mins = 2

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info(
            'Attempting to get the most recent bookmark_date for entity {}.'.
            format(self.ENTITY))
        bookmark_date = get_last_record_value_for_table(
            self.state, table, 'bookmark_date')

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info(
                'Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.'
            )
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        # Convert bookmarked start date to POSIX.
        bookmark_date_posix = int(bookmark_date.timestamp())
        to_date = datetime.now(
            pytz.utc) - timedelta(minutes=sync_interval_in_mins)
        to_date_posix = int(to_date.timestamp())
        sync_window = str([bookmark_date_posix, to_date_posix])
        LOGGER.info("Sync Window {} for schema {}".format(sync_window, table))

        # Create params for filtering
        if self.ENTITY == 'event':
            params = {"occurred_at[between]": sync_window}
            bookmark_key = 'occurred_at'
        elif self.ENTITY in ['promotional_credit', 'comment']:
            params = {"created_at[between]": sync_window}
            bookmark_key = 'created_at'
        else:
            params = {"updated_at[between]": sync_window}
            bookmark_key = 'updated_at'

        # Add sort_by[asc] to prevent data overwrite by oldest deleted records
        if self.SORT_BY is not None:
            params['sort_by[asc]'] = self.SORT_BY

        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))

        while not done:
            max_date = to_date

            response = self.client.make_request(url=self.get_url(),
                                                method=api_method,
                                                params=params)

            if 'api_error_code' in response.keys():
                if response['api_error_code'] == 'configuration_incompatible':
                    LOGGER.error('{} is not configured'.format(
                        response['error_code']))
                    break

            records = response.get('list')

            # List of deleted "plans, addons and coupons" from the /events endpoint
            deleted_records = []

            if self.config.get('include_deleted') not in [
                    'false', 'False', False
            ]:
                if self.ENTITY == 'event':
                    # Parse "event_type" from events records and collect deleted plan/addon/coupon from events
                    for record in records:
                        event = record.get(self.ENTITY)
                        if event["event_type"] == 'plan_deleted':
                            Util.plans.append(event['content']['plan'])
                        elif event['event_type'] == 'addon_deleted':
                            Util.addons.append(event['content']['addon'])
                        elif event['event_type'] == 'coupon_deleted':
                            Util.coupons.append(event['content']['coupon'])
                # We need additional transform for deleted records as "to_write" already contains transformed data
                if self.ENTITY == 'plan':
                    for plan in Util.plans:
                        deleted_records.append(self.transform_record(plan))
                if self.ENTITY == 'addon':
                    for addon in Util.addons:
                        deleted_records.append(self.transform_record(addon))
                if self.ENTITY == 'coupon':
                    for coupon in Util.coupons:
                        deleted_records.append(self.transform_record(coupon))

            # Get records from API response and transform
            to_write = self.get_stream_data(records)

            with singer.metrics.record_counter(endpoint=table) as ctr:
                # Combine transformed records and deleted data of  "plan, addon and coupon" collected from events endpoint
                to_write = to_write + deleted_records
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

            # update max_date with minimum of (max_replication_key) or (now - 2 minutes)
            # this will make sure that bookmark does not go beyond (now - 2 minutes)
            # so, no data will be missed due to API latency
            max_date = min(max_date, to_date)
            self.state = incorporate(self.state, table, 'bookmark_date',
                                     max_date)

            if not response.get('next_offset'):
                LOGGER.info("Final offset reached. Ending sync.")
                done = True
            else:
                LOGGER.info("Advancing by one offset.")
                params['offset'] = response.get('next_offset')
                bookmark_date = max_date

        save_state(self.state)
示例#8
0
 def get_headers(self, technician_id):
     start_date = get_config_start_date(self.config)
     end_date = start_date + timedelta(days=7)
     parsed_response = self.execute_request(technician_id, start_date, end_date)
     return parsed_response['headers']
示例#9
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD
        done = False

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info(
            "Attempting to get the most recent bookmark_date for entity {}.".
            format(self.ENTITY))
        bookmark_date = get_last_record_value_for_table(
            self.state, table, "bookmark_date")

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info(
                "Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead."
            )
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        # Convert bookmarked start date to POSIX.
        bookmark_date_posix = int(bookmark_date.timestamp())

        # Create params for filtering
        if self.ENTITY == "event":
            params = {"occurred_at[after]": bookmark_date_posix}
            bookmark_key = "occurred_at"
        elif self.ENTITY == "promotional_credit":
            params = {"created_at[after]": bookmark_date_posix}
            bookmark_key = "created_at"
        else:
            params = {"updated_at[after]": bookmark_date_posix}
            bookmark_key = "updated_at"

        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))

        while not done:
            max_date = bookmark_date

            response = self.client.make_request(url=self.get_url(),
                                                method=api_method,
                                                params=params)

            if "api_error_code" in response.keys():
                if response["api_error_code"] == "configuration_incompatible":
                    LOGGER.error("{} is not configured".format(
                        response["error_code"]))
                    break

            records = response.get("list")

            to_write = self.get_stream_data(records)

            if self.ENTITY == "event":
                for event in to_write:
                    if event["event_type"] == "plan_deleted":
                        Util.plans.append(event["content"]["plan"])
                    elif event["event_type"] == "addon_deleted":
                        Util.addons.append(event["content"]["addon"])
                    elif event["event_type"] == "coupon_deleted":
                        Util.coupons.append(event["content"]["coupon"])
            if self.ENTITY == "plan":
                for plan in Util.plans:
                    to_write.append(plan)
            if self.ENTITY == "addon":
                for addon in Util.addons:
                    to_write.append(addon)
            if self.ENTITY == "coupon":
                for coupon in Util.coupons:
                    to_write.append(coupon)

            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

                for item in to_write:
                    bookmark_key = item.get(bookmark_key, None)

                    if bookmark_key is not None:
                        bookmark_date = parse(bookmark_key)

                        LOGGER.info(f"BOOKMARK_KEY value: {bookmark_date}")

                        max_date = max(
                            ensure_tz_aware_dt(max_date),
                            ensure_tz_aware_dt(bookmark_date),
                        )
                    else:
                        pass
                        # LOGGER.info("BOOKMARK_KEY not found. Using max date.")

            self.state = incorporate(self.state, table, "bookmark_date",
                                     max_date)

            if not response.get("next_offset"):
                LOGGER.info("Final offset reached. Ending sync.")
                done = True
            else:
                LOGGER.info("Advancing by one offset.")
                params["offset"] = response.get("next_offset")
                bookmark_date = max_date

        save_state(self.state)
示例#10
0
    def sync_data(self, parent_ids, return_first_response=False):
        table = self.TABLE

        if not return_first_response:
            self.write_schema()

        start_date = get_last_record_value_for_table(self.state, table,
                                                     'start_date')
        technician_id = get_last_record_value_for_table(
            self.state, table, 'technician_id')

        if start_date is None:
            start_date = get_config_start_date(self.config)
        else:
            start_date = parse(start_date)

        if technician_id is None:
            technician_id = 0

        end_date = start_date + datetime.timedelta(days=7)

        self.client.make_request(
            'https://secure.logmeinrescue.com/API/setReportArea_v8.aspx',
            'POST',
            params={'area': self.REPORT_AREA})

        while start_date < datetime.datetime.now(tz=pytz.UTC):
            for index, parent_id in enumerate(parent_ids):
                if parent_id < technician_id:
                    continue

                LOGGER.info(
                    ('Fetching session report for technician {} ({}/{}) '
                     'from {} to {}').format(parent_id, index + 1,
                                             len(parent_ids), start_date,
                                             end_date))

                self.client.make_request(
                    'https://secure.logmeinrescue.com/API/setReportDate_v2.aspx',  # noqa
                    'POST',
                    params={
                        'bdate': start_date.strftime('%-m/%-d/%Y %-H:%M:%S'),
                        'edate': end_date.strftime('%-m/%-d/%Y %-H:%M:%S')
                    })

                response = self.client.make_request(self.get_url(),
                                                    'GET',
                                                    params={
                                                        'node': parent_id,
                                                        'nodetype': 'NODE',
                                                    })

                if return_first_response:
                    return response

                to_write = self.get_stream_data(response)

                if not return_first_response:
                    with singer.metrics.record_counter(endpoint=table) as ctr:
                        singer.write_records(table, to_write)

                        ctr.increment(amount=len(to_write))

                    self.state = incorporate(self.state, table,
                                             'technician_id', parent_id)
                    self.state = incorporate(self.state, table, 'start_date',
                                             start_date)
                    self.state = save_state(self.state)

                elif len(to_write) > 0:
                    return to_write[0]

            start_date = end_date
            end_date = start_date + datetime.timedelta(days=7)
            technician_id = 0

            if not return_first_response:
                self.state = incorporate(self.state, table, 'start_date',
                                         start_date)
                self.state = incorporate(self.state,
                                         table,
                                         'technician_id',
                                         technician_id,
                                         force=True)
                self.state = save_state(self.state)