示例#1
0
    def __init__(self, message="ping", every="second", start=None, until=None):
        if is_text(message):
            self.message = show_message(message)
        else:
            self.message = message

        self.every = Duration(every)

        if isinstance(until, Signal):
            self.please_stop = until
        elif until == None:
            self.please_stop = Signal()
        else:
            self.please_stop = Till(Duration(until).seconds)

        self.thread = None
        if start:
            self.thread = Thread.run(
                "repeat",
                _repeat,
                self.message,
                self.every,
                Date(start),
                parent_thread=MAIN_THREAD,
                please_stop=self.please_stop,
            ).release()
示例#2
0
    def __init__(
        self,
        interval,  # TIME INTERVAL BETWEEN RUNS
        starting,  # THE TIME TO START THE INTERVAL COUNT
        max_runtime=MAX_RUNTIME,  # LIMIT HOW LONG THE PROCESS IS ALIVE
        wait_for_shutdown=WAIT_FOR_SHUTDOWN,  # LIMIT PAITENCE WHEN ASKING FOR SHUTDOWN, THEN SEND KILL
        process=None,
    ):
        self.duration = Duration(interval)
        self.starting = coalesce(Date(starting), Date.now())
        self.max_runtime = Duration(max_runtime)
        self.wait_for_shutdown = Duration(wait_for_shutdown)
        # Process parameters
        self.process = process

        # STATE
        self.last_started = None
        self.last_finished = None
        self.run_count = 0
        self.fail_count = 0
        self.current = None
        self.terminator = None  # SIGNAL TO KILL THE PROCESS
        self.next_run = self._next_run()
        self.next = Till(till=self.next_run)
        self.next_run.then(self.run)
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds

        self.es = Cluster(kwargs).get_or_create_index(
            schema=json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
示例#4
0
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        refresh_interval="1second",
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep,
                                               MINUTE)).seconds
        kwargs.host = randoms.sample(listwrap(host), 1)[0]

        rollover_interval = coalesce(kwargs.rollover.interval,
                                     kwargs.rollover.max, "year")
        rollover_max = coalesce(kwargs.rollover.max, kwargs.rollover.interval,
                                "year")

        schema = set_default(
            kwargs.schema,
            {
                "mappings": {
                    kwargs.type: {
                        "properties": {
                            "~N~": {
                                "type": "nested"
                            }
                        }
                    }
                }
            },
            json2value(value2json(SCHEMA), leaves=True),
        )

        self.es = RolloverIndex(
            rollover_field={"get": [{
                "first": "."
            }, {
                "literal": "timestamp"
            }]},
            rollover_interval=rollover_interval,
            rollover_max=rollover_max,
            schema=schema,
            limit_replicas=True,
            typed=True,
            read_only=False,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
示例#5
0
    def __init__(self, field, interval=DAY, expire=NEVER, flake=Null, kwargs=None):
        column = first(flake.leaves(field))
        if not column:
            Log.error("expecting {{field}} in snowflake for partitioning", field=field)

        self.field = column.es_column
        self.interval = Duration(interval)
        self.expire = Duration(expire)
        if not isinstance(self.interval, Duration) or not isinstance(
            self.expire, Duration
        ):
            Log.error("expecting durations")
示例#6
0
    def __init__(self, config):
        self.config = config = wrap(config)
        config.range.min = Date(config.range.min)
        config.range.max = Date(config.range.max)
        config.start = Date(config.start)
        config.interval = Duration(config.interval)
        config.branches = listwrap(config.branches)
        self.destination = bigquery.Dataset(config.destination).get_or_create_table(
            config.destination
        )

        # CALCULATE THE PREVIOUS RUN
        mozci_version = self.version("mozci")
        self.etl_config_table = jx_sqlite.Container(
            config.config_db
        ).get_or_create_facts("etl-range")
        done_result = wrap(self.etl_config_table.query()).data
        prev_done = done_result[0]
        if len(done_result) and prev_done.mozci_version == mozci_version:
            self.done = Data(
                mozci_version=mozci_version,
                min=Date(coalesce(prev_done.min, config.start, "today-2day")),
                max=Date(coalesce(prev_done.max, config.start, "today-2day")),
            )
        else:
            self.done = Data(
                mozci_version=mozci_version,
                min=Date(coalesce(config.start, "today-2day")),
                max=Date(coalesce(config.start, "today-2day")),
            )
            self.etl_config_table.add(self.done)
示例#7
0
def queue_consumer(pull_queue, please_stop=None):
    queue = aws.Queue(pull_queue)
    time_offset = None
    request_count = 0

    while not please_stop:
        request = queue.pop(till=please_stop)
        if please_stop:
            break
        if not request:
            Log.note("Nothing in queue, pausing for 5 seconds...")
            (please_stop | Till(seconds=5)).wait()
            continue

        if SKIP_TRY_REQUESTS and 'try' in request.where['and'].eq.branch:
            Log.note("Skipping try revision.")
            queue.commit()
            continue

        now = Date.now().unix
        if time_offset is None:
            time_offset = now - request.meta.request_time

        next_request = request.meta.request_time + time_offset
        if next_request > now:
            Log.note("Next request in {{wait_time}}",
                     wait_time=Duration(seconds=next_request - now))
            Till(till=next_request).wait()

        Thread.run("request " + text_type(request_count), one_request, request)
        request_count += 1
        queue.commit()
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)
        with SingleInstance(flavor_id=settings.args.filename):
            settings.run_interval = Duration(settings.run_interval)
            for u in settings.utility:
                u.discount = coalesce(u.discount, 0)
                # MARKUP drives WITH EXPECTED device MAPPING
                num_ephemeral_volumes = ephemeral_storage[
                    u.instance_type]["num"]
                for i, d in enumerate(d for d in u.drives if not d.device):
                    letter = convert.ascii2char(98 + num_ephemeral_volumes + i)
                    d.device = "/dev/xvd" + letter

            settings.utility = UniqueIndex(["instance_type"],
                                           data=settings.utility)
            instance_manager = new_instance(settings.instance)
            m = SpotManager(instance_manager, kwargs=settings)

            if ENABLE_SIDE_EFFECTS:
                m.update_spot_requests()

            if m.watcher:
                m.watcher.join()
    except Exception as e:
        Log.warning("Problem with spot manager", cause=e)
    finally:
        Log.stop()
        MAIN_THREAD.stop()
    def __init__(self,
                 from_address,
                 to_address,
                 subject,
                 region,
                 aws_access_key_id=None,
                 aws_secret_access_key=None,
                 cc=None,
                 log_type="ses",
                 average_interval=HOUR,
                 kwargs=None):
        """
        SEND WARNINGS AND ERRORS VIA EMAIL

        settings = {
            "log_type": "ses",
            "from_address": "*****@*****.**",
            "to_address": "*****@*****.**",
            "cc":[
                {"to_address":"*****@*****.**", "where":{"eq":{"template":"gr"}}}
            ],
            "subject": "[ALERT][STAGING] Problem in ETL",
            "aws_access_key_id": "userkey"
            "aws_secret_access_key": "secret"
            "region":"us-west-2"
        }
        """
        assert kwargs.log_type == "ses", "Expecing settings to be of type 'ses'"
        self.settings = kwargs
        self.accumulation = []
        self.cc = listwrap(cc)
        self.next_send = Date.now() + MINUTE
        self.locker = Lock()
        self.settings.average_interval = Duration(kwargs.average_interval)
示例#10
0
文件: main.py 项目: mozilla/cia-tasks
    def __init__(self, config):
        self.config = config = wrap(config)
        config.range.min = Date(config.range.min)
        config.range.max = Date(config.range.max)
        config.start = Date(config.start)
        config.interval = Duration(config.interval)
        config.branches = listwrap(config.branches)
        self.destination = bigquery.Dataset(
            config.destination).get_or_create_table(config.destination)

        # CALCULATE THE PREVIOUS RUN
        mozci_version = self.version("mozci")
        prev_done = self.get_state()
        if prev_done and prev_done.mozci_version == mozci_version:
            self.done = Data(
                mozci_version=mozci_version,
                min=Date(coalesce(prev_done.min, config.start, "today-2day")),
                max=Date(coalesce(prev_done.max, config.start, "today-2day")),
            )
        else:
            self.done = Data(
                mozci_version=mozci_version,
                min=Date(coalesce(config.start, "today-2day")),
                max=Date(coalesce(config.start, "today-2day")),
            )
            self.set_state()
    def _request_spot_instances(self, price, availability_zone_group,
                                instance_type, kwargs):
        kwargs.self = None
        kwargs.kwargs = None

        # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP
        if instance_type.startswith("m3."):
            kwargs.placement_group = None

        kwargs.network_interfaces = NetworkInterfaceCollection(
            *(NetworkInterfaceSpecification(**i)
              for i in listwrap(kwargs.network_interfaces)
              if self.vpc_conn.get_all_subnets(
                  subnet_ids=i.subnet_id,
                  filters={"availabilityZone": availability_zone_group})))

        if len(kwargs.network_interfaces) == 0:
            Log.error(
                "No network interface specifications found for {{availability_zone}}!",
                availability_zone=kwargs.availability_zone_group)

        block_device_map = BlockDeviceMapping()

        # GENERIC BLOCK DEVICE MAPPING
        for dev, dev_settings in kwargs.block_device_map.items():
            block_device_map[dev] = BlockDeviceType(delete_on_termination=True,
                                                    **dev_settings)
        kwargs.block_device_map = block_device_map

        # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping
        num_ephemeral_volumes = ephemeral_storage[instance_type]["num"]
        for i in range(num_ephemeral_volumes):
            letter = convert.ascii2char(98 + i)  # START AT "b"
            kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType(
                ephemeral_name='ephemeral' + text(i),
                delete_on_termination=True)

        if kwargs.expiration:
            kwargs.valid_until = (Date.now() +
                                  Duration(kwargs.expiration)).format(ISO8601)
            kwargs.expiration = None

        # ATTACH NEW EBS VOLUMES
        for i, drive in enumerate(self.settings.utility[instance_type].drives):
            letter = convert.ascii2char(98 + i + num_ephemeral_volumes)
            device = drive.device = coalesce(drive.device, "/dev/sd" + letter)
            d = drive.copy()
            d.path = None  # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType
            d.device = None
            if d.size:
                kwargs.block_device_map[device] = BlockDeviceType(
                    delete_on_termination=True, **d)

        output = list(self.ec2_conn.request_spot_instances(**kwargs))
        return output
示例#12
0
    def setup(
        self,
        instance,  # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP
        utility  # THE utility OBJECT FOUND IN CONFIG
    ):
        with self.locker:
            if not self.settings.setup_timeout:
                Log.error(
                    "expecting instance.setup_timeout to prevent setup from locking"
                )

            def worker(please_stop):
                cpu_count = int(round(utility.cpu))

                with hide('output'):
                    Log.note("setup {{instance}}", instance=instance.id)
                    self._config_fabric(instance)
                    Log.note("update packages on {{instance}} ip={{ip}}",
                             instance=instance.id,
                             ip=instance.ip_address)
                    try:
                        self._update_ubuntu_packages()
                    except Exception as e:
                        Log.warning(
                            "Can not setup {{instance}}, type={{type}}",
                            instance=instance.id,
                            type=instance.instance_type,
                            cause=e)
                        return
                    Log.note("setup etl on {{instance}}", instance=instance.id)
                    self._setup_etl_code()
                    Log.note("setup grcov on {{instance}}",
                             instance=instance.id)
                    self._setup_grcov()
                    Log.note("add config file on {{instance}}",
                             instance=instance.id)
                    self._add_private_file()
                    Log.note("setup supervisor on {{instance}}",
                             instance=instance.id)
                    self._setup_etl_supervisor(cpu_count)
                    Log.note("setup done {{instance}}", instance=instance.id)

            worker_thread = Thread.run(
                "etl setup started at " + unicode(Date.now().format()), worker)
            (Till(timeout=Duration(self.settings.setup_timeout).seconds)
             | worker_thread.stopped).wait()
            if not worker_thread.stopped:
                Log.error("critical failure in thread {{name|quote}}",
                          name=worker_thread.name)
            worker_thread.join()
示例#13
0
 def __init__(self,
              from_address,
              to_address,
              subject,
              region,
              aws_access_key_id=None,
              aws_secret_access_key=None,
              cc=None,
              log_type="ses",
              max_interval=HOUR,
              kwargs=None):
     assert kwargs.log_type == "ses", "Expecing settings to be of type 'ses'"
     self.settings = kwargs
     self.accumulation = []
     self.cc = listwrap(cc)
     self.next_send = Date.now() + MINUTE
     self.locker = Lock()
     self.settings.max_interval = Duration(kwargs.max_interval)
示例#14
0
    def __init__(
        self,
        from_address,
        to_address,
        subject,
        host,
        username,
        password,
        port=465,
        use_ssl=1,
        cc=None,
        log_type="email",
        max_interval=HOUR,
        kwargs=None
    ):
        """
        SEND WARNINGS AND ERRORS VIA EMAIL

        settings = {
            "log_type":"email",
            "from_address": "*****@*****.**",
            "to_address": "*****@*****.**",
            "cc":[
                {"to_address":"*****@*****.**", "where":{"eq":{"template":"gr"}}}
            ],
            "subject": "Problem in Pulse Logger",
            "host": "mail.mozilla.com",
            "port": 465,
            "username": "******",
            "password": "******",
            "use_ssl": 1
        }

        """
        assert kwargs.log_type == "email", "Expecing settings to be of type 'email'"
        self.settings = kwargs
        self.accumulation = []
        self.cc = listwrap(cc)
        self.next_send = Date.now() + MINUTE
        self.locker = Lock()
        self.settings.max_interval = Duration(kwargs.max_interval)
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(region_name=kwargs.aws.region,
                        aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
                        aws_secret_access_key=unwrap(
                            kwargs.aws.aws_secret_access_key))
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
        self.price_lookup = None
        self.no_capacity = {}
        self.no_capacity_file = File(
            kwargs.price_file).parent / "no capacity.json"
        self.done_making_new_spot_requests = Signal()
        self.net_new_locker = Lock()
        self.net_new_spot_requests = UniqueIndex(
            ("id", ))  # SPOT REQUESTS FOR THIS SESSION
        self.watcher = None
        self.active = None

        self.settings.uptime.bid_percentile = coalesce(
            self.settings.uptime.bid_percentile, self.settings.bid_percentile)
        self.settings.uptime.history = coalesce(
            Date(self.settings.uptime.history), DAY)
        self.settings.uptime.duration = coalesce(
            Duration(self.settings.uptime.duration), Date("5minute"))
        self.settings.max_percent_per_type = coalesce(
            self.settings.max_percent_per_type, 1)

        if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(
        ):
            self._start_life_cycle_watcher()
        if not disable_prices:
            self.pricing()
示例#16
0
def main():
    since = Date.today() - Duration(SCATTER_RANGE)

    if config.database.host not in listwrap(
            config.analysis.expected_database_host):
        Log.error("Expecting database to be one of {{expected}}",
                  expected=config.analysis.expected_database_host)
    if not config.analysis.interesting:
        Log.alert(
            "Expecting config file to have `analysis.interesting` with a json expression.  All series are included."
        )

    # SETUP DESTINATION
    deviant_summary = bigquery.Dataset(
        config.deviant_summary).get_or_create_table(
            read_only=True, kwargs=config.deviant_summary)

    if config.args.id:
        # EXIT EARLY AFTER WE GOT THE SPECIFIC IDS
        if len(config.args.id) < 4:
            step_detector.SHOW_CHARTS = True
        for signature_hash in config.args.id:
            process(
                signature_hash,
                since=since,
                source=config.database,
                deviant_summary=deviant_summary,
                show=True,
            )
        return

    # DOWNLOAD
    if config.args.download:
        # GET INTERESTING SERIES
        where_clause = BQLang[jx_expression(
            config.analysis.interesting)].to_bq(deviant_summary.schema)

        # GET ALL KNOWN SERIES
        docs = list(
            deviant_summary.sql_query(f"""
                SELECT * EXCEPT (_rank, values) 
                FROM (
                  SELECT 
                    *, 
                    row_number() over (partition by id order by last_updated desc) as _rank 
                  FROM  
                    {quote_column(deviant_summary.full_name)}
                  ) a 
                WHERE _rank=1 and {sql_iso(where_clause)}
                LIMIT {quote_value(DOWNLOAD_LIMIT)}
            """))
        if len(docs) == DOWNLOAD_LIMIT:
            Log.warning("Not all signatures downloaded")
        File(config.args.download).write(list2tab(docs, separator=","))

    # DEVIANT
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort={
            "value": {
                "abs": "overall_dev_score"
            },
            "sort": "desc"
        },
        limit=config.args.deviant,
        show_old=False,
        show_distribution=True,
    )

    # MODAL
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort="overall_dev_score",
        limit=config.args.modal,
        where={"eq": {
            "overall_dev_status": "MODAL"
        }},
        show_distribution=True,
    )

    # OUTLIERS
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort={
            "value": "overall_dev_score",
            "sort": "desc"
        },
        limit=config.args.outliers,
        where={"eq": {
            "overall_dev_status": "OUTLIERS"
        }},
        show_distribution=True,
    )

    # SKEWED
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort={
            "value": {
                "abs": "overall_dev_score"
            },
            "sort": "desc"
        },
        limit=config.args.skewed,
        where={"eq": {
            "overall_dev_status": "SKEWED"
        }},
        show_distribution=True,
    )

    # OK
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort={
            "value": {
                "abs": "overall_dev_score"
            },
            "sort": "desc"
        },
        limit=config.args.ok,
        where={"eq": {
            "overall_dev_status": "OK"
        }},
        show_distribution=True,
    )

    # NOISE
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort={
            "value": {
                "abs": "relative_noise"
            },
            "sort": "desc"
        },
        where={"gte": {
            "num_pushes": 30
        }},
        limit=config.args.noise,
    )

    # EXTRA
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort={
            "value": {
                "abs": "max_extra_diff"
            },
            "sort": "desc"
        },
        where={"lte": {
            "num_new_segments": 7
        }},
        limit=config.args.extra,
    )

    # MISSING
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort={
            "value": {
                "abs": "max_missing_diff"
            },
            "sort": "desc"
        },
        where={"lte": {
            "num_old_segments": 6
        }},
        limit=config.args.missing,
    )

    # PATHOLOGICAL
    show_sorted(
        config=config,
        since=since,
        source=config.database,
        deviant_summary=deviant_summary,
        sort={
            "value": "num_segments",
            "sort": "desc"
        },
        limit=config.args.pathological,
    )
示例#17
0
 def __init__(self, start, duration, child):
     self.duration = Duration(duration)
     self.start = self.last_value = Date(start).floor(self.duration)
     self.batch = 0
     self.child = child
示例#18
0
 def _cleaner(self, please_stop):
     while not please_stop:
         (
             please_stop | Till(seconds=Duration(WRITE_INTERVAL).total_seconds())
         ).wait()
         self.clean()
示例#19
0
 def parse_time_interval(self, interval: str) -> timedelta:
     duration = Duration(interval)
     return timedelta(seconds=duration.total_seconds())
from mo_threads import Lock, Signal, Thread, Till
from mo_threads.threads import MAIN_THREAD
from mo_times import DAY, Date, Duration, HOUR, MINUTE, SECOND, Timer, WEEK
from pyLibrary import convert
from pyLibrary.meta import cache, new_instance

_please_import = http

SINGLE_THREAD_SETUP = False
ENABLE_SIDE_EFFECTS = True
ALLOW_SHUTDOWN = True
DEBUG_PRICING = True
TIME_FROM_RUNNING_TO_LOGIN = 7 * MINUTE
ERROR_ON_CALL_TO_SETUP = "Problem with setup()"
DELAY_BEFORE_SETUP = 1 * MINUTE  # PROBLEM WITH CONNECTING ONLY HAPPENS WITH BIGGER ES MACHINES
CAPACITY_NOT_AVAILABLE_RETRY = Duration(
    "day")  # SOME MACHINES ARE NOT AVAILABLE


class SpotManager(object):
    @override
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(region_name=kwargs.aws.region,
                        aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
                        aws_secret_access_key=unwrap(
                            kwargs.aws.aws_secret_access_key))
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
示例#21
0
文件: main.py 项目: mozilla/cia-tasks
    set_default,
)
from mo_json import value2json, json2value
from mo_logs import startup, constants, Log
from mo_threads import Process, Till
from mo_threads.repeat import Repeat
from mo_times import Date, Duration, Timer, MINUTE
from pyLibrary.env import git
from pyLibrary.meta import extend

MAX_RUNTIME = "50minute"  # STOP PROCESSING AFTER THIS GIVEN TIME
DEFAULT_START = "today-2day"
LOOK_BACK = 30
LOOK_FORWARD = 30
CACHY_STATE = "cia-tasks/etl/schedules"
CACHY_RETENTION = Duration("30day") / MINUTE
SHOW_S3_CACHE_HIT = True
SECRET_PREFIX = "project/cia/smart-scheduling"
SECRET_NAMES = [
    "destination.account_info",
]


def inject_secrets(config):
    """
    INJECT THE SECRETS INTO THE CONFIGURATION
    :param config: CONFIG DATA

    ************************************************************************
    ** ENSURE YOU HAVE AN ENVIRONMENT VARIABLE SET:
    ** TASKCLUSTER_ROOT_URL = https://community-tc.services.mozilla.com
示例#22
0
def process(
    about_deviant,
    since,
    source,
    deviant_summary,
    show=False,
    show_limit=MAX_POINTS,
    show_old=False,
    show_distribution=None,
):
    """
    :param signature_hash: The performance hash
    :param since: Only data after this date
    :param show:
    :param show_limit:
    :param show_old:
    :param show_distribution:
    :return:
    """
    sig_id = about_deviant.id
    if not isinstance(sig_id, int):
        Log.error("expecting id")

    # GET SIGNATURE DETAILS
    sig = get_signature(db_config=source, signature_id=sig_id)

    # GET SIGNATURE DETAILS
    data = get_dataum(source, sig.id, since=since, limit=show_limit)

    min_date = since.unix
    pushes = jx.sort(
        [{
            "value": median(rows.value),
            "runs": rows,
            "push": {
                "time": unwrap(t)["push.time"]
            },
        } for t, rows in jx.groupby(data, "push.time")
         if t["push\\.time"] > min_date],
        "push.time",
    )

    values = list(pushes.value)
    title = "-".join(
        map(
            str,
            [
                sig.id,
                sig.framework,
                sig.suite,
                sig.test,
                sig.repository,
                sig.platform,
                about_deviant.overall_dev_status,
            ],
        ))
    # EG https://treeherder.mozilla.org/perf.html#/graphs?highlightAlerts=1&series=mozilla-central,fee739b45f7960e4a520d8e0bd781dd9d0a3bec4,1,10&timerange=31536000
    url = "https://treeherder.mozilla.org/perf.html#/graphs?" + value2url_param(
        {
            "highlightAlerts":
            1,
            "series": [
                sig.repository, sig.id, 1,
                coalesce(sig.framework_id, sig.framework)
            ],
            "timerange":
            Duration(TREEHERDER_RANGE).seconds
        })

    Log.note("With {{title}}: {{url}}", title=title, url=url)

    with Timer("find segments"):
        new_segments, new_diffs = find_segments(values, sig.alert_change_type,
                                                sig.alert_threshold)

    # USE PERFHERDER ALERTS TO IDENTIFY OLD SEGMENTS
    old_segments = tuple(
        sorted(
            set([
                i for i, p in enumerate(pushes) if any(r.alert.id
                                                       for r in p.runs)
            ] + [0, len(pushes)])))
    old_medians = [0.0] + [
        np.median(values[s:e])
        for s, e in zip(old_segments[:-1], old_segments[1:])
    ]
    old_diffs = np.array(
        [b / a - 1 for a, b in zip(old_medians[:-1], old_medians[1:])] + [0])

    if len(new_segments) == 1:
        overall_dev_status = None
        overall_dev_score = None
        last_mean = None
        last_std = None
        last_dev_status = None
        last_dev_score = None
        relative_noise = None
        Log.note("not ")
    else:
        # NOISE OF LAST SEGMENT
        s, e = new_segments[-2], new_segments[-1]
        last_segment = np.array(values[s:e])
        ignore = IGNORE_TOP
        trimmed_segment = last_segment[np.argsort(last_segment)
                                       [ignore:-ignore]]
        last_mean = np.mean(trimmed_segment)
        last_std = np.std(trimmed_segment)
        last_dev_status, last_dev_score = deviance(trimmed_segment)
        relative_noise = last_std / last_mean

        # FOR EACH SEGMENT, NORMALIZE MEAN AND VARIANCE
        normalized = []
        for s, e in jx.pairs(new_segments):
            data = np.array(values[s:e])
            norm = (data + last_mean - np.mean(data)) * last_std / np.std(data)
            normalized.extend(norm)

        overall_dev_status, overall_dev_score = deviance(normalized)
        Log.note(
            "\n\tdeviance = {{deviance}}\n\tnoise={{std}}\n\tpushes={{pushes}}\n\tsegments={{num_segments}}",
            title=title,
            deviance=(overall_dev_status, overall_dev_score),
            std=relative_noise,
            pushes=len(values),
            num_segments=len(new_segments) - 1,
        )

        if show_distribution:
            histogram(trimmed_segment,
                      title=last_dev_status + "=" + text(last_dev_score))

    max_extra_diff = None
    max_missing_diff = None
    _is_diff = is_diff(new_segments, old_segments)
    if _is_diff:
        # FOR MISSING POINTS, CALC BIGGEST DIFF
        max_extra_diff = mo_math.MAX(
            abs(d) for s, d in zip(new_segments, new_diffs)
            if all(not (s - TOLERANCE <= o <= s + TOLERANCE)
                   for o in old_segments))
        max_missing_diff = mo_math.MAX(
            abs(d) for s, d in zip(old_segments, old_diffs)
            if all(not (s - TOLERANCE <= n <= s + TOLERANCE)
                   for n in new_segments))

        Log.alert(
            "Disagree max_extra_diff={{max_extra_diff|round(places=3)}}, max_missing_diff={{max_missing_diff|round(places=3)}}",
            max_extra_diff=max_extra_diff,
            max_missing_diff=max_missing_diff,
        )
        Log.note("old={{old}}, new={{new}}",
                 old=old_segments,
                 new=new_segments)
    else:
        Log.note("Agree")

    if show and len(pushes):
        show_old and assign_colors(values, old_segments, title="OLD " + title)
        assign_colors(values, new_segments, title="NEW " + title)
        if url:
            webbrowser.open(url)

    if isinstance(deviant_summary, bigquery.Table):
        Log.note("BigQuery summary not updated")
        return

    deviant_summary.upsert(
        where={"eq": {
            "id": sig.id
        }},
        doc=Data(
            id=sig_id,
            title=title,
            num_pushes=len(values),
            num_segments=len(new_segments) - 1,
            relative_noise=relative_noise,
            overall_dev_status=overall_dev_status,
            overall_dev_score=overall_dev_score,
            last_mean=last_mean,
            last_std=last_std,
            last_dev_status=last_dev_status,
            last_dev_score=last_dev_score,
            last_updated=Date.now(),
            is_diff=_is_diff,
            max_extra_diff=max_extra_diff,
            max_missing_diff=max_missing_diff,
            num_new_segments=len(new_segments),
            num_old_segments=len(old_segments),
        ),
    )
示例#23
0
    def __init__(self, kwargs=None):
        self.settings = kwargs
        self.schema = SnowflakeSchema(self.settings.snowflake)
        self._extract = extract = kwargs.extract

        # SOME PREP
        get_git_revision()

        # VERIFY WE DO NOT HAVE TOO MANY OTHER PROCESSES WORKING ON STUFF
        with MySQL(**kwargs.snowflake.database) as db:
            processes = None
            try:
                processes = jx.filter(
                    db.query("show processlist"), {
                        "and": [{
                            "neq": {
                                "Command": "Sleep"
                            }
                        }, {
                            "neq": {
                                "Info": "show processlist"
                            }
                        }]
                    })
            except Exception as e:
                Log.warning("no database", cause=e)

            if processes:
                if DEBUG:
                    Log.warning("Processes are running\n{{list|json}}",
                                list=processes)
                else:
                    Log.error("Processes are running\n{{list|json}}",
                              list=processes)

        extract.type = listwrap(extract.type)
        extract.start = listwrap(extract.start)
        extract.batch = listwrap(extract.batch)
        extract.field = listwrap(extract.field)
        if any(
                len(extract.type) != len(other)
                for other in [extract.start, extract.batch, extract.field]):
            Log.error(
                "Expecting same number of dimensions for `type`, `start`, `batch`, and `field` in the `extract` inner object"
            )
        for i, t in enumerate(extract.type):
            if t == "time":
                extract.start[i] = Date(extract.start[i])
                extract.batch[i] = Duration(extract.batch[i])
            elif t == "number":
                pass
            else:
                Log.error('Expecting `extract.type` to be "number" or "time"')

        extract.threads = coalesce(extract.threads, 1)
        self.done_pulling = Signal()
        self.queue = Queue("all batches",
                           max=2 * coalesce(extract.threads, 1),
                           silent=True)

        self.bucket = s3.Bucket(self.settings.destination)
        self.notify = aws.Queue(self.settings.notify)
        Thread.run("get records", self.pull_all_remaining)
示例#24
0
文件: main.py 项目: mozilla/cia-tasks
def main():
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)

        # SHUNT PYTHON LOGGING TO MAIN LOGGING
        capture_logging()
        # SHUNT ADR LOGGING TO MAIN LOGGING
        # https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add
        capture_loguru()

        if config.taskcluster:
            inject_secrets(config)

        @extend(Configuration)
        def update(self, config):
            """
            Update the configuration object with new parameters
            :param config: dict of configuration
            """
            for k, v in config.items():
                if v != None:
                    self._config[k] = v

            self._config["sources"] = sorted(
                map(os.path.expanduser, set(self._config["sources"])))

            # Use the NullStore by default. This allows us to control whether
            # caching is enabled or not at runtime.
            self._config["cache"].setdefault("stores",
                                             {"null": {
                                                 "driver": "null"
                                             }})
            object.__setattr__(self, "cache", CustomCacheManager(self._config))
            for _, store in self._config["cache"]["stores"].items():
                if store.path and not store.path.endswith("/"):
                    # REQUIRED, OTHERWISE FileStore._create_cache_directory() WILL LOOK AT PARENT DIRECTORY
                    store.path = store.path + "/"

        if SHOW_S3_CACHE_HIT:
            s3_get = S3Store._get

            @extend(S3Store)
            def _get(self, key):
                with Timer("get {{key}} from S3", {"key": key},
                           verbose=False) as timer:
                    output = s3_get(self, key)
                    if output is not None:
                        timer.verbose = True
                    return output

        # UPDATE ADR CONFIGURATION
        with Repeat("waiting for ADR", every="10second"):
            adr.config.update(config.adr)
            # DUMMY TO TRIGGER CACHE
            make_push_objects(from_date=Date.today().format(),
                              to_date=Date.now().format(),
                              branch="autoland")

        outatime = Till(seconds=Duration(MAX_RUNTIME).total_seconds())
        outatime.then(lambda: Log.alert("Out of time, exit early"))
        Schedulers(config).process(outatime)
    except Exception as e:
        Log.warning("Problem with etl! Shutting down.", cause=e)
    finally:
        Log.stop()