Пример #1
0
    def parse_config(self, config_file_path):
        """Parses the configuration file

        Args:
            config_file_path (string): path to the configuration file
        """
        # Read main logfeeder configuration file
        staticconf.YamlConfiguration(config_file_path)
        self.aws_config_filepath = staticconf.read(
            'logfeeder.aws_config_filepath')
        self.domain = staticconf.read('logfeeder.domain')
        app_file = staticconf.read('{0}.file'.format(self.APP_NAME))

        # Read app specific configuration file
        contents = staticconf.YamlConfiguration(app_file)
        self.api_creds_filepath = staticconf.read('api_creds_filepath')
        if 'rate_limiter_num_calls_per_timeunit' in contents:
            self.rate_limiter = RateLimiter(
                calls_per_timeunit=staticconf.read_int(
                    'rate_limiter_num_calls_per_timeunit'),
                seconds_per_timeunit=staticconf.read_int(
                    'rate_limiter_num_seconds_per_timeunit'),
            )
        self.sub_apis = {}
        for key in contents:
            if key.startswith('enable_'):
                name_of_subapi = key.split('enable_', 1)[1]
                self.sub_apis[name_of_subapi] = staticconf.read_bool(key)
        # If an API doesn't have any sub_apis, then set set its APP_NAME to self.sub_apis for code compatibility
        if not self.sub_apis:
            self.sub_apis = {self.APP_NAME: True}
Пример #2
0
    def __init__(self, db_obj, sqs_scanner_queue, sqs_worker_queue, emailer):
        """
        :param db_obj: dynamodb table of scheduled jobs
        :type db_obj: boto.dynamodb2.table.Table

        :param sqs_scanner_queue: scanner queue which to send a feed back message
        :type sqs_scanner_queue: boto.sqs.queue.Queue

        :param sqs_worker_queue: worker queue which to receive job from
        :type sqs_worker_queue: boto.sqs.queue.Queue

        """
        self.db = db_obj
        self.scanner_queue = sqs_scanner_queue
        self.worker_queue = sqs_worker_queue
        self._should_run = True
        self._run_once = False
        self.worker_keepalive_sec = staticconf.read_int('scanner.worker_keepalive_sec')
        self.max_error_retries = staticconf.read_int('max_error_retries')
        self.msg_max_retention_sec = int(
            self.scanner_queue.get_queue_attributes()['MessageRetentionPeriod']
        )
        self.msg_max_retention_sec += 3600  # give SQS enough time to delete the message
        self.emailer = emailer

        log("scanner initialization")
        log(dict((k, str(v))for k, v in vars(self).iteritems()))
Пример #3
0
def get_target_capacity_value(target_capacity: str, pool: str, scheduler: str) -> int:
    target_capacity = target_capacity.lower()
    pool_namespace = POOL_NAMESPACE.format(pool=pool, scheduler=scheduler)
    if target_capacity == 'min':
        return staticconf.read_int('scaling_limits.min_capacity', namespace=pool_namespace)
    elif target_capacity == 'max':
        return staticconf.read_int('scaling_limits.max_capacity', namespace=pool_namespace)
    else:
        return int(target_capacity)
Пример #4
0
    def configure_initial(self):
        # Any keys in the env_config will override defaults in config.yaml.
        setup_config(self.options)

        self.logger = logger
        self.region = staticconf.read_string('aws.region')
        self.last_time_called = self.options.start_time
        self.run_interval = staticconf.read_int('batches.spot_prices.run_interval_seconds')
        self.dedupe_interval = staticconf.read_int('batches.spot_prices.dedupe_interval_seconds')
        self.metrics_client = ClustermanMetricsBotoClient(region_name=self.region)
Пример #5
0
    def add_instance(self, instance):
        cpus = instance.resources.cpus
        self.aws_cpus.add_delta(self.current_time, cpus)

        join_delay_mean = staticconf.read_int('join_delay_mean_seconds')
        join_delay_stdev = staticconf.read_int('join_delay_stdev_seconds')
        instance.join_time = instance.start_time.shift(
            seconds=random.gauss(join_delay_mean, join_delay_stdev))

        self.mesos_cpus.add_delta(instance.join_time, cpus)
Пример #6
0
def _populate_cluster_size_events(simulator, start_time, end_time):
    capacity_metrics = simulator.metrics_client.get_metric_values(
        f'fulfilled_capacity',
        METADATA,
        start_time.timestamp,
        end_time.timestamp,
        use_cache=False,
        extra_dimensions=get_cluster_dimensions(
            simulator.metadata.cluster,
            simulator.metadata.pool,
            simulator.metadata.scheduler,
        ),
    )
    for i, (timestamp,
            data) in enumerate(capacity_metrics['fulfilled_capacity']):
        market_data = {}
        for market_str, value in data.items():
            market = InstanceMarket.parse(market_str)
            weight = get_market_resources(market).cpus // staticconf.read_int(
                'cpus_per_weight')
            market_data[market] = int(value) // weight
        simulator.markets |= set(market_data.keys())
        use_join_delay = (
            i != 0)  # Want to start the cluster out at the expected capacity
        simulator.add_event(
            ModifyClusterSizeEvent(arrow.get(timestamp), market_data,
                                   use_join_delay))
Пример #7
0
 def submit_host_for_termination(self,
                                 host: Host,
                                 delay: Optional[int] = None) -> None:
     delay_seconds = delay if delay is not None else staticconf.read_int(
         f'drain_termination_timeout_seconds.{host.sender}', default=90)
     logger.info(
         f'Delaying terminating {host.instance_id} for {delay_seconds} seconds'
     )
     return self.client.send_message(
         QueueUrl=self.termination_queue_url,
         DelaySeconds=delay_seconds,
         MessageAttributes={
             'Sender': {
                 'DataType': 'String',
                 'StringValue': host.sender,
             },
         },
         MessageBody=json.dumps({
             'instance_id': host.instance_id,
             'ip': host.ip,
             'hostname': host.hostname,
             'group_id': host.group_id,
             'scheduler': host.scheduler,
         }),
     )
Пример #8
0
    def __init__(self, config_loc, config_override_loc, emailer,
                 num_processes=1, wait_timeout_sec=60):
        """
        :param config_loc: path of config.yaml
        :type config_loc: string

        :param config_override_loc: path of config-env-dev.yaml
        :type config_override_loc: string

        :param run_local: run local flag
        :type run_local: boolean

        :param num_processes: number of worker processes to use for sqs request
        :type num_processes: int

        :param wait_timeout_sec: A timeout passed to conditional variable wait
            function.  If thread is woken up on timeout, do some maintenance work.
        :type wait_timeout_sec: int

        """
        self._config_loc = config_loc
        self._config_override_loc = config_override_loc
        self._stop_requested = False
        self._run_once = False
        self.max_error_retries = staticconf.read_int('max_error_retries')
        self.etl_helper = ETLStatusHelper()
        self.jobs_db = TableConnection.get_connection('ScheduledJobs')
        self.runs_db = TableConnection.get_connection('ETLRecords')
        self._num_processes = num_processes
        self._cond = threading.Condition(threading.Lock())
        self._wait_timeout_sec = max(wait_timeout_sec, 60)
        self.emailer = emailer
Пример #9
0
    def configure_initial(self) -> None:
        setup_config(self.options)

        # Since we want to collect metrics for all the pools, we need to call setup_config
        # first to load the cluster config path, and then read all the entries in that directory
        self.pools: MutableMapping[str, List[str]] = {}
        for scheduler in {'mesos', 'kubernetes'}:
            self.pools[scheduler] = get_pool_name_list(self.options.cluster,
                                                       scheduler)
        for scheduler, pools in self.pools.items():
            for pool in pools:
                self.config.watchers.append({
                    f'{pool}.{scheduler}':
                    get_pool_config_path(self.options.cluster, pool,
                                         scheduler),
                })
                load_cluster_pool_config(self.options.cluster, pool, scheduler,
                                         None)

        self.region = staticconf.read_string('aws.region')
        self.run_interval = staticconf.read_int(
            'batches.cluster_metrics.run_interval_seconds')
        self.logger = logger

        self.metrics_client = ClustermanMetricsBotoClient(
            region_name=self.region)
Пример #10
0
    def __init__(
        self,
        region_name: str,
        app_identifier: Optional[str] = None,
        ttl_days: Optional[int] = None,
    ) -> None:
        """
        :param region_name: name of AWS region to use instead of the default.
        :param app_identifier: prefix for all application metric names.
            Required from client applications to avoid name collisions.
        :param ttl_days: number of days after which data written by this client should expire.
            Use -1 if data should never expire, and leave as None to use the default value.
        """
        self.region_name = region_name
        ttl_days = ttl_days or staticconf.read_int('dynamodb.ttl_days',
                                                   namespace=CONFIG_NAMESPACE)

        self.ddb = get_metrics_session().resource(
            'dynamodb',
            region_name=self.region_name,
        )
        self.app_identifier = app_identifier
        if ttl_days == -1:
            # Never expire
            self.ttl_seconds = None
        else:
            self.ttl_seconds = int(timedelta(days=ttl_days).total_seconds())

        self._cache: MutableMapping[str, CacheEntry] = defaultdict(CacheEntry)
Пример #11
0
def mock_autoscaler():
    autoscaling_config_dict = {
        'default_signal_role': 'clusterman',
        'setpoint': 0.7,
        'target_capacity_margin': 0.1,
    }

    with mock.patch(
        'clusterman.autoscaler.autoscaler.ClustermanMetricsBotoClient',
        autospec=True,
    ), mock.patch(
        'clusterman.autoscaler.autoscaler.PoolManager',
        autospec=True,
    ), mock.patch(
        'clusterman.autoscaler.autoscaler.Autoscaler._get_signal_for_app',
        autospec=True,
    ), mock.patch(
        'clusterman.autoscaler.autoscaler.get_monitoring_client',
    ), mock.patch(
        'clusterman.autoscaler.autoscaler.ExternalSignal',
    ), mock.patch(
        'clusterman.autoscaler.autoscaler.PendingPodsSignal',
    ), staticconf.testing.PatchConfiguration(
        {'autoscaling': autoscaling_config_dict},
    ):
        mock_autoscaler = Autoscaler('mesos-test', 'bar', 'mesos', ['bar'], monitoring_enabled=False)
        mock_autoscaler.pool_manager.cluster_connector = mock.Mock()

    mock_autoscaler.pool_manager.target_capacity = 300
    mock_autoscaler.pool_manager.min_capacity = staticconf.read_int(
        'scaling_limits.min_capacity', namespace=POOL_NAMESPACE.format(pool='bar', scheduler='mesos')
    )
    mock_autoscaler.pool_manager.max_capacity = staticconf.read_int(
        'scaling_limits.max_capacity', namespace=POOL_NAMESPACE.format(pool='bar', scheduler='mesos')
    )
    mock_autoscaler.pool_manager.non_orphan_fulfilled_capacity = 0

    mock_autoscaler.target_capacity_gauge = mock.Mock(spec=GaugeProtocol)
    mock_autoscaler.non_orphan_capacity_gauge = mock.Mock(spec=GaugeProtocol)
    mock_autoscaler.resource_request_gauges = {
        'mem': mock.Mock(spec=GaugeProtocol),
        'cpus': mock.Mock(spec=GaugeProtocol),
        'disk': mock.Mock(spec=GaugeProtocol),
        'gpus': mock.Mock(spec=GaugeProtocol),
    }
    return mock_autoscaler
Пример #12
0
def test_load_cluster_pool_config(cluster, pool, pool_other_config,
                                  mock_config_files):
    config.load_cluster_pool_config(cluster, pool, 'mesos', None)

    pool_namespace = POOL_NAMESPACE.format(pool=pool, scheduler='mesos')
    assert staticconf.read_int('other_config',
                               namespace=pool_namespace) == pool_other_config
    assert staticconf.read_string(f'resource_groups',
                                  namespace=pool_namespace) == cluster
Пример #13
0
def dates_from_rs_status(status_helper,
                         db,
                         logstream,
                         retry_on_err,
                         single_date=None):
    """
    date_from_rs_status gets the jobs that have completed the et step, but
    have not started the load step, and have no jobs before them running or
    in error

    Args:
    status_helper -- a wrapper around a backing store to aid in CRUD
    db -- is the database we query
    logstream -- a PipelineStreamLogger
    retry_on_err -- a boolean, True if we're retrying on errors
    single_date -- date string of the form YYYY-MM-DD if we're \
        only looking for one

    Returns:
    a list of dates to catch up on formatted as strings YYYY/MM/DD
    """
    versions = get_yaml_table_versions(pipeline_yaml_schema_file_path())

    if single_date is not None:
        data_date = get_formatted_date(single_date)
        if data_date is None:
            handle_error("bad input date: {0}".format(single_date), logstream)
        start_datetime = datetime.strptime(data_date, "%Y/%m/%d")
        status_tuples = \
            status_helper.query_et_complete_job(db, versions, data_date)
    else:
        days_back = read_int('pipeline.load_step.days_to_check') + 1
        start_datetime = datetime.utcnow() - timedelta(days=days_back)
        status_tuples = \
            status_helper.query_et_complete_jobs(db, versions, start_datetime)

    if status_tuples is False:
        handle_error(
            "query for complete et job failed, version={0}, date={1}".format(
                versions,
                data_date if single_date is not None else start_datetime),
            logstream)

    candidates = []
    last_date = (start_datetime - timedelta(days=1)).strftime("%Y/%m/%d")
    for ddate, ld_status in status_tuples:
        if not one_day_greater(ddate, last_date):
            break
        elif ld_status is None or (ld_status == 'error' and retry_on_err):
            candidates.append(ddate)
        elif ld_status == 'error':
            break
        last_date = ddate
    candidate_string = "candidates dates for load: {0}".format(candidates)
    logstream.write_msg(status='running', extra_msg=candidate_string)
    return candidates
Пример #14
0
def dates_from_rs_status(status_helper, db, logstream,
                         retry_on_err, single_date=None):
    """
    date_from_rs_status gets the jobs that have completed the et step, but
    have not started the load step, and have no jobs before them running or
    in error

    Args:
    status_helper -- a wrapper around a backing store to aid in CRUD
    db -- is the database we query
    logstream -- a PipelineStreamLogger
    retry_on_err -- a boolean, True if we're retrying on errors
    single_date -- date string of the form YYYY-MM-DD if we're \
        only looking for one

    Returns:
    a list of dates to catch up on formatted as strings YYYY/MM/DD
    """
    versions = get_yaml_table_versions(pipeline_yaml_schema_file_path())

    if single_date is not None:
        data_date = get_formatted_date(single_date)
        if data_date is None:
            handle_error("bad input date: {0}".format(single_date), logstream)
        start_datetime = datetime.strptime(data_date, "%Y/%m/%d")
        status_tuples = \
            status_helper.query_et_complete_job(db, versions, data_date)
    else:
        days_back = read_int('pipeline.load_step.days_to_check') + 1
        start_datetime = datetime.utcnow() - timedelta(days=days_back)
        status_tuples = \
            status_helper.query_et_complete_jobs(db, versions, start_datetime)

    if status_tuples is False:
        handle_error(
            "query for complete et job failed, version={0}, date={1}".format(
                versions,
                data_date if single_date is not None else start_datetime
            ),
            logstream
        )

    candidates = []
    last_date = (start_datetime - timedelta(days=1)).strftime("%Y/%m/%d")
    for ddate, ld_status in status_tuples:
        if not one_day_greater(ddate, last_date):
            break
        elif ld_status is None or (ld_status == 'error' and retry_on_err):
            candidates.append(ddate)
        elif ld_status == 'error':
            break
        last_date = ddate
    candidate_string = "candidates dates for load: {0}".format(candidates)
    logstream.write_msg(status='running', extra_msg=candidate_string)
    return candidates
Пример #15
0
def fetch_creds():
    '''
    Return a dictionary holding temporary credentials from the metadata server.
    This function will block upto the timeout specified in config file. You may
    not call this method unless config.yaml is loaded
    '''
    url = '{url_root}/{name}'.format(
        url_root=staticconf.read_string('instance_profile_creds_url'),
        name=staticconf.read_string('instance_profile_name'))
    in_stream = urllib2.urlopen(
        url,
        timeout=staticconf.read_int(
            'instance_profile_creds_timeout_in_seconds', default=4))
    return simplejson.load(in_stream)
Пример #16
0
def fetch_creds():
    '''
    Return a dictionary holding temporary credentials from the metadata server.
    This function will block upto the timeout specified in config file. You may
    not call this method unless config.yaml is loaded
    '''
    url = '{url_root}/{name}'.format(
        url_root=staticconf.read_string('instance_profile_creds_url'),
        name=staticconf.read_string('instance_profile_name'))
    in_stream = urllib2.urlopen(
        url,
        timeout=staticconf.read_int(
            'instance_profile_creds_timeout_in_seconds', default=4
        )
    )
    return simplejson.load(in_stream)
Пример #17
0
    def __init__(self, logstrm, psql_auth_file, run_local=False):

        self.run_local = run_local
        self.host = staticconf.read_string('redshift_host')
        self.port = staticconf.read_int('redshift_port')
        private_dict = YamlConfiguration(psql_auth_file)
        self.user = private_dict['redshift_user']
        self.password = private_dict['redshift_password']
        self.log_stream = logstrm
        self._aws_key = ''
        self._aws_secret = ''
        self._aws_token = ''
        self._aws_token_expiry = datetime.utcnow()
        self._whitelist = ['select', 'create', 'insert', 'update']
        self._set_aws_auth()
        psycopg2.extensions.set_wait_callback(wait_select_inter)
Пример #18
0
    def __init__(self, logstrm, psql_auth_file, run_local=False):

        self.run_local = run_local
        self.host = staticconf.read_string('redshift_host')
        self.port = staticconf.read_int('redshift_port')
        private_dict = YamlConfiguration(psql_auth_file)
        self.user = private_dict['redshift_user']
        self.password = private_dict['redshift_password']
        self.log_stream = logstrm
        self._aws_key = ''
        self._aws_secret = ''
        self._aws_token = ''
        self._aws_token_expiry = datetime.utcnow()
        self._whitelist = ['select', 'create', 'insert', 'update']
        self._set_aws_auth()
        psycopg2.extensions.set_wait_callback(wait_select_inter)
Пример #19
0
 def process_drain_queue(
     self,
     mesos_operator_client: Optional[Callable[...,
                                              Callable[[str],
                                                       Callable[...,
                                                                None]]]],
     kube_operator_client: Optional[KubernetesClusterConnector],
 ) -> None:
     host_to_process = self.get_host_to_drain()
     if host_to_process and host_to_process.instance_id not in self.draining_host_ttl_cache:
         self.draining_host_ttl_cache[
             host_to_process.instance_id] = arrow.now().shift(
                 seconds=DRAIN_CACHE_SECONDS)
         if host_to_process.scheduler == 'mesos':
             logger.info(
                 f'Mesos host to drain and submit for termination: {host_to_process}'
             )
             try:
                 drain(
                     mesos_operator_client,
                     [f'{host_to_process.hostname}|{host_to_process.ip}'],
                     arrow.now().timestamp * 1000000000,
                     staticconf.read_int(
                         'mesos_maintenance_timeout_seconds', default=600) *
                     1000000000)
             except Exception as e:
                 logger.error(
                     f'Failed to drain {host_to_process.hostname} continuing to terminate anyway: {e}'
                 )
             finally:
                 self.submit_host_for_termination(host_to_process)
         elif host_to_process.scheduler == 'kubernetes':
             logger.info(
                 f'Kubernetes host to drain and submit for termination: {host_to_process}'
             )
             self.submit_host_for_termination(host_to_process, delay=0)
         else:
             logger.info(
                 f'Host to submit for termination immediately: {host_to_process}'
             )
             self.submit_host_for_termination(host_to_process, delay=0)
         self.delete_drain_messages([host_to_process])
     elif host_to_process:
         logger.warning(
             f'Host: {host_to_process.hostname} already being processed, skipping...'
         )
         self.delete_drain_messages([host_to_process])
Пример #20
0
 def process_drain_queue(
     self,
     mesos_operator_client: Callable[..., Callable[[str], Callable[...,
                                                                   None]]],
 ) -> None:
     host_to_process = self.get_host_to_drain()
     if host_to_process and host_to_process.instance_id not in self.draining_host_ttl_cache:
         self.draining_host_ttl_cache[
             host_to_process.instance_id] = arrow.now().shift(
                 seconds=DRAIN_CACHE_SECONDS)
         # if hosts do not have hostname it means they are likely not in mesos and don't need draining
         # so instead we send them to terminate straight away
         if not host_to_process.hostname:
             logger.info(
                 f'Host to submit for termination immediately: {host_to_process}'
             )
             self.submit_host_for_termination(host_to_process, delay=0)
         else:
             logger.info(
                 f'Host to drain and submit for termination: {host_to_process}'
             )
             try:
                 drain(
                     mesos_operator_client,
                     [f'{host_to_process.hostname}|{host_to_process.ip}'],
                     arrow.now().timestamp * 1000000000,
                     staticconf.read_int(
                         'mesos_maintenance_timeout_seconds', default=600) *
                     1000000000)
             except Exception as e:
                 logger.error(
                     f'Failed to drain {host_to_process.hostname} continuing to terminate anyway: {e}'
                 )
             finally:
                 self.submit_host_for_termination(host_to_process)
         self.delete_drain_messages([host_to_process])
     elif host_to_process:
         logger.warning(
             f'Host: {host_to_process.hostname} already being processed, skipping...'
         )
         self.delete_drain_messages([host_to_process])
Пример #21
0
def copy_table(psql_helper, db_name, ddate, log_tuple, ttl_days, logstream):
    s3_log, rs_table = log_tuple
    namespaced_table_name = get_namespaced_tablename(rs_table)
    table_start = time.time()
    extra_msg = "from s3 log: {0}".format(s3_log)
    logstream.write_msg('starting', extra_msg=extra_msg)

    # about to load new day, remove oldest
    rows_deleted = None
    if ttl_days is not None:
        rows_deleted = \
            delete_old_data(psql_helper, db_name, rs_table, ttl_days - 1)
    if rows_deleted:
        logstream.write_msg('delete_ok',
                            extra_msg="{0} rows".format(rows_deleted))

    # Try to reclaim disk space.  If not needed, it will be fast.
    # Calling here and not in the 'if rows_deleted' code to prevent
    # scenario where rows were deleted but compact failed. Then on retry
    # there will be nothing to delete but since space is not reclaimed
    # there may not be enough for a new load, resulting in failure forever.
    if ttl_days is not None:
        compact_table(psql_helper, db_name, namespaced_table_name)

    delimiter = read_string('redshift_column_delimiter')
    delimiter = delimiter.decode("string_escape")
    if delimiter not in string.printable:
        delimiter = '\\' + oct(ord(delimiter))

    copy_sql = LOAD % (namespaced_table_name, s3_log, delimiter)
    result = psql_helper.run_sql(
        copy_sql,
        db_name,
        " copying from " + s3_log,
        s3_needed=True,
        time_est_secs=read_int('pipeline.load_step.copy_time_est_secs'))
    if result is not False:
        logstream.write_msg('complete',
                            job_start_secs=table_start,
                            extra_msg=extra_msg)
    return result
    def _get_agent_metadata(self, instance_ip: str) -> AgentMetadata:
        for c in self.simulator.aws_clusters:
            for i in c.instances.values():
                if instance_ip == i.ip_address:
                    return AgentMetadata(
                        agent_id=str(uuid.uuid4()),
                        state=(
                            AgentState.ORPHANED
                            if self.simulator.current_time < i.join_time
                            else AgentState.IDLE
                        ),
                        total_resources=ClustermanResources(
                            cpus=i.resources.cpus,
                            mem=i.resources.mem * 1000,
                            disk=(i.resources.disk or staticconf.read_int('ebs_volume_size', 0)) * 1000,
                            gpus=(i.resources.gpus),
                        )
                    )

        # if we don't know the given IP then it's orphaned
        return AgentMetadata(state=AgentState.ORPHANED)
Пример #23
0
def copy_table(psql_helper, db_name, ddate, log_tuple, ttl_days, logstream):
    s3_log, rs_table = log_tuple
    namespaced_table_name = get_namespaced_tablename(rs_table)
    table_start = time.time()
    extra_msg = "from s3 log: {0}".format(s3_log)
    logstream.write_msg('starting', extra_msg=extra_msg)

    # about to load new day, remove oldest
    rows_deleted = None
    if ttl_days is not None:
        rows_deleted = \
            delete_old_data(psql_helper, db_name, rs_table, ttl_days - 1)
    if rows_deleted:
        logstream.write_msg('delete_ok',
                            extra_msg="{0} rows".format(rows_deleted))

    # Try to reclaim disk space.  If not needed, it will be fast.
    # Calling here and not in the 'if rows_deleted' code to prevent
    # scenario where rows were deleted but compact failed. Then on retry
    # there will be nothing to delete but since space is not reclaimed
    # there may not be enough for a new load, resulting in failure forever.
    if ttl_days is not None:
        compact_table(psql_helper, db_name, namespaced_table_name)

    delimiter = read_string('redshift_column_delimiter')
    delimiter = delimiter.decode("string_escape")
    if delimiter not in string.printable:
        delimiter = '\\' + oct(ord(delimiter))

    copy_sql = LOAD % (namespaced_table_name, s3_log, delimiter)
    result = psql_helper.run_sql(
        copy_sql,
        db_name, " copying from " + s3_log,
        s3_needed=True,
        time_est_secs=read_int('pipeline.load_step.copy_time_est_secs')
    )
    if result is not False:
        logstream.write_msg('complete', job_start_secs=table_start,
                            extra_msg=extra_msg)
    return result
Пример #24
0
    def __init__(self,
                 config_loc,
                 config_override_loc,
                 emailer,
                 num_processes=1,
                 wait_timeout_sec=60):
        """
        :param config_loc: path of config.yaml
        :type config_loc: string

        :param config_override_loc: path of config-env-dev.yaml
        :type config_override_loc: string

        :param run_local: run local flag
        :type run_local: boolean

        :param num_processes: number of worker processes to use for sqs request
        :type num_processes: int

        :param wait_timeout_sec: A timeout passed to conditional variable wait
            function.  If thread is woken up on timeout, do some maintenance work.
        :type wait_timeout_sec: int

        """
        self._config_loc = config_loc
        self._config_override_loc = config_override_loc
        self._stop_requested = False
        self._run_once = False
        self.max_error_retries = staticconf.read_int('max_error_retries')
        self.etl_helper = ETLStatusHelper()
        self.jobs_db = TableConnection.get_connection('ScheduledJobs')
        self.runs_db = TableConnection.get_connection('ETLRecords')
        self._num_processes = num_processes
        self._cond = threading.Condition(threading.Lock())
        self._wait_timeout_sec = max(wait_timeout_sec, 60)
        self.emailer = emailer
Пример #25
0
 def _get_timeout(self):
     return read_int("scanner.et_timeout")
Пример #26
0
from pyfiglet import Figlet


def _get_memos_dir():
    dir_memos = staticconf.read_string('dir_memos', default='')
    # try absolute dir
    path_memos = Path(dir_memos)
    if path_memos and path_memos.exists() and path_memos.is_dir():
        return path_memos
    # try relative dir
    path_memos = PATH_CWD / dir_memos
    if path_memos and path_memos.exists() and path_memos.is_dir():
        return path_memos
    return None


# global utilities
FIGLET = Figlet(font='big')
PATH_CWD = Path.cwd()
PATH_CONFIG = PATH_CWD / 'config.yaml'
PATH_RECENT_OPEN = PATH_CWD / 'recent_open.yaml'

staticconf.YamlConfiguration(PATH_CONFIG.as_posix())
PATH_MEMOS = _get_memos_dir()
EDITOR = staticconf.read_string('default_editor',
                                default=os.getenv('EDITOR', 'vi'))
SEARCH_FUZZY = staticconf.read_string('default_search_fuzzy',
                                      default='True').lower() == 'true'
RECENT_OPEN_LIST_SIZE = staticconf.read_int('recent_open_list_size',
                                            default=10)
Пример #27
0
def __load_data_from_s3(
        status_helper, prefixes, date_with_slashes,
        mrjob_path, local, db_name, logstream, force_et=False
        ):
    """
    load_data_from_s3 iterates over prefixes and loads data for a
    particular date for the first prefix where the data exists.  It also
    checks whether data has already been loaded for a date and if so, skips
    the load

    Args:
    status_helper -- An object handle to interact with status table
    prefixes -- a list of s3 prefixes for input data
    date_with_slashes -- a date string of the form 'YYYY/MM/DD'
    mrjob_path -- module.entry_point of the job to extract and \
        transform the data
    local -- True if we're running locally (i.e., devc) False for aws instance
    logstream -- a PipelineStreamLogger

    Returns:
    ---
    """
    start_time = time.time()

    table_versions = get_yaml_table_versions(pipeline_yaml_schema_file_path())
    conditions = {
        'table_versions': table_versions,
        'data_date': date_with_slashes
    }
    if status_helper.et_started(conditions, db_name):
        logstream.write_msg(
            "complete",
            extra_msg="skipping: et_step already started"
        )
        return

    prefix_for_this_data = get_next_dir_to_load(
        prefixes, date_with_slashes, local, logstream, force_et
    )
    if not prefix_for_this_data:
        jobtime = 0
        err_msg = "no prefix available date={0}, prefixes={1}".format(
            date_with_slashes, prefixes
        )
        logstream.write_msg("error", error_msg=err_msg)
        status_helper.log_status_result(
            conditions,
            jobtime,
            db_name,
            failed=True, err_msg=err_msg
        )
        raise Exception(err_msg)

    # check if mrjob is already done
    data_we_check = "{0} {1} {2}".format(
        get_s3_output_user_prefix(),
        date_with_slashes,
        local
    )
    logstream.write_msg("running", extra_msg=data_we_check)
    if data_available(
        get_s3_output_user_prefix(),
        date_with_slashes,
        local,
        done_file_name='_SUCCESS'
    ):
        logstream.write_msg(
            "complete",
            extra_msg="skipping: et_step already done"
        )
        return

    jobtime = time.time()
    mrjob_args = create_emr_args(
        date_with_slashes,
        read_int('pipeline.et_step.cores'),
        prefix_for_this_data, local
    )
    status_helper.insert_et(conditions, db_name)
    logstream.write_msg("running", extra_msg=mrjob_args)

    result, err_reason = __run_mr_job(mrjob_path, mrjob_args, logstream)
    failed = not result

    jobtime = time.time() - start_time
    status_helper.log_status_result(
        conditions, jobtime, db_name,
        failed=failed, err_msg=err_reason
    )
    if failed:
        raise Exception(err_reason)
    return
"""
Everything related to creating a WSGI application to actually serve the application.

Also exposes a __main__ function to actually serve the application using a simple default server.
"""
import staticconf
from pyramid.config import Configurator
from wsgiref.simple_server import make_server

from pyramid_api_example.config import load_configuration
from pyramid_api_example.routes import register_routes


def create_application():
    config = Configurator()
    register_routes(config)
    config.scan('pyramid_api_example.views')
    return config.make_wsgi_app()

if __name__ == '__main__':
    load_configuration()
    host = staticconf.read_string('application_bind_host')
    port = staticconf.read_int('application_bind_port')
    app = create_application()
    print ('Starting up server on http://{0}:{1}'.format(host, port))
    server = make_server(host, port, app)
    server.serve_forever()
Пример #29
0
 def _get_timeout(self):
     return read_int("scanner.et_timeout")
Пример #30
0
def __load_data_from_s3(status_helper,
                        prefixes,
                        date_with_slashes,
                        mrjob_path,
                        local,
                        db_name,
                        logstream,
                        force_et=False):
    """
    load_data_from_s3 iterates over prefixes and loads data for a
    particular date for the first prefix where the data exists.  It also
    checks whether data has already been loaded for a date and if so, skips
    the load

    Args:
    status_helper -- An object handle to interact with status table
    prefixes -- a list of s3 prefixes for input data
    date_with_slashes -- a date string of the form 'YYYY/MM/DD'
    mrjob_path -- module.entry_point of the job to extract and \
        transform the data
    local -- True if we're running locally (i.e., devc) False for aws instance
    logstream -- a PipelineStreamLogger

    Returns:
    ---
    """
    start_time = time.time()

    table_versions = get_yaml_table_versions(pipeline_yaml_schema_file_path())
    conditions = {
        'table_versions': table_versions,
        'data_date': date_with_slashes
    }
    if status_helper.et_started(conditions, db_name):
        logstream.write_msg("complete",
                            extra_msg="skipping: et_step already started")
        return

    prefix_for_this_data = get_next_dir_to_load(prefixes, date_with_slashes,
                                                local, logstream, force_et)
    if not prefix_for_this_data:
        jobtime = 0
        err_msg = "no prefix available date={0}, prefixes={1}".format(
            date_with_slashes, prefixes)
        logstream.write_msg("error", error_msg=err_msg)
        status_helper.log_status_result(conditions,
                                        jobtime,
                                        db_name,
                                        failed=True,
                                        err_msg=err_msg)
        raise Exception(err_msg)

    # check if mrjob is already done
    data_we_check = "{0} {1} {2}".format(get_s3_output_user_prefix(),
                                         date_with_slashes, local)
    logstream.write_msg("running", extra_msg=data_we_check)
    if data_available(get_s3_output_user_prefix(),
                      date_with_slashes,
                      local,
                      done_file_name='_SUCCESS'):
        logstream.write_msg("complete",
                            extra_msg="skipping: et_step already done")
        return

    jobtime = time.time()
    mrjob_args = create_emr_args(date_with_slashes,
                                 read_int('pipeline.et_step.cores'),
                                 prefix_for_this_data, local)
    status_helper.insert_et(conditions, db_name)
    logstream.write_msg("running", extra_msg=mrjob_args)

    result, err_reason = __run_mr_job(mrjob_path, mrjob_args, logstream)
    failed = not result

    jobtime = time.time() - start_time
    status_helper.log_status_result(conditions,
                                    jobtime,
                                    db_name,
                                    failed=failed,
                                    err_msg=err_reason)
    if failed:
        raise Exception(err_reason)
    return