Пример #1
0
def _load_job(repository, job_datum, push_id, lower_tier_signatures):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform', {}).get('architecture',
                                                             'unknown'))

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform', {}).get('architecture',
                                                               'unknown'))

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(
        option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash,
                option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    # if a job with this symbol and name exists, always
    # use its default group (even if that group is different
    # from that specified)
    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')
    if job_type.job_group:
        job_group = job_type.job_group
    else:
        job_group, _ = JobGroup.objects.get_or_create(
            name=job_datum.get('group_name') or 'unknown',
            symbol=job_datum.get('group_symbol') or 'unknown')
        job_type.job_group = job_group
        job_type.save(update_fields=['job_group'])

    product_name = job_datum.get('product_name', 'unknown')
    if len(product_name.strip()) == 0:
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(lambda x: str(x),
            [build_system_type, repository.name, build_platform.os_name,
             build_platform.platform, build_platform.architecture,
             machine_platform.os_name, machine_platform.platform,
             machine_platform.architecture,
             job_group.name, job_group.symbol, job_type.name,
             job_type.symbol, option_collection_hash,
             reference_data_name])))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, created = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name, defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash
        })

    if created:
        # A new ReferenceDataSignature has been added, so we need
        # to reload lower tier exclusions
        lower_tier_signatures = _get_lower_tier_signatures(repository)

    tier = job_datum.get('tier') or 1
    # job tier signatures override the setting from the job structure
    # Check the signatures list for any supported lower tiers that have
    # an active exclusion profile.

    result = job_datum.get('result', 'unknown')

    # As stated elsewhere, a job will end up in the lowest tier where its
    # signature belongs.  So if a signature is in Tier-2 and Tier-3, it
    # will end up in 3.
    for tier_info in lower_tier_signatures:
        if signature_hash in tier_info["signatures"]:
            tier = tier_info["tier"]

    try:
        duration = JobDuration.objects.values_list(
            'average_duration', flat=True).get(
                repository=repository, signature=signature_hash)
    except JobDuration.DoesNotExist:
        duration = 0

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # this could theoretically throw an exception if we were processing
        # several updates simultaneously, but that should never happen --
        # and if it does it's better just to error out
        Job.objects.create(
            guid=job_guid,
            repository=repository,
            signature=signature,
            build_platform=build_platform,
            machine_platform=machine_platform,
            machine=machine,
            option_collection_hash=option_collection_hash,
            job_type=job_type,
            product=product,
            failure_classification=default_failure_classification,
            who=who,
            reason=reason,
            result=result,
            state=state,
            tier=tier,
            submit_time=submit_time,
            start_time=start_time,
            end_time=end_time,
            last_modified=datetime.now(),
            running_eta=duration,
            push_id=push_id)

    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)

    # add taskcluster metadata if applicable
    if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'])
        except IntegrityError:
            pass

    # Update job with any data that would have changed
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        product=product,
        failure_classification=default_failure_classification,
        who=who,
        reason=reason,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        running_eta=duration,
        push_id=push_id)

    artifacts = job_datum.get('artifacts', [])

    has_text_log_summary = any(x for x in artifacts
                               if x['name'] == 'text_log_summary')
    if artifacts:
        artifacts = serialize_artifact_json_blobs(artifacts)

        # need to add job guid to artifacts, since they likely weren't
        # present in the beginning
        for artifact in artifacts:
            if not all(k in artifact for k in ("name", "type", "blob")):
                raise ValueError(
                    "Artifact missing properties: {}".format(artifact))
            # Ensure every artifact has a ``job_guid`` value.
            # It is legal to submit an artifact that doesn't have a
            # ``job_guid`` value.  But, if missing, it should inherit that
            # value from the job itself.
            if "job_guid" not in artifact:
                artifact["job_guid"] = job_guid

        store_job_artifacts(artifacts)

    log_refs = job_datum.get('log_references', [])
    job_logs = []
    if log_refs:
        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            # this indicates that a summary artifact was submitted with
            # this job that corresponds to the buildbot_text log url.
            # Therefore, the log does not need parsing.  So we should
            # ensure that it's marked as already parsed.
            if has_text_log_summary and name == 'buildbot_text':
                parse_status = JobLog.PARSED
            else:
                parse_status_map = dict([(k, v) for (v, k) in
                                         JobLog.STATUSES])
                mapped_status = parse_status_map.get(
                    log.get('parse_status'))
                if mapped_status:
                    parse_status = mapped_status
                else:
                    parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={
                    'status': parse_status
                })

            job_logs.append(jl)

        _schedule_log_parsing(job, job_logs, result)

    return (job_guid, signature_hash)
Пример #2
0
def _load_job(repository, job_datum, push_id, lower_tier_signatures):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform',
                               {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform',
                                   {}).get('architecture', 'unknown'))

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform',
                              {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform',
                               {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform',
                                   {}).get('architecture', 'unknown'))

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash, option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')

    job_group, _ = JobGroup.objects.get_or_create(
        name=job_datum.get('group_name') or 'unknown',
        symbol=job_datum.get('group_symbol') or 'unknown')

    product_name = job_datum.get('product_name', 'unknown')
    if len(product_name.strip()) == 0:
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(lambda x: str(x), [
            build_system_type, repository.name, build_platform.os_name,
            build_platform.platform, build_platform.architecture,
            machine_platform.os_name, machine_platform.platform,
            machine_platform.architecture, job_group.name, job_group.symbol,
            job_type.name, job_type.symbol, option_collection_hash,
            reference_data_name
        ])))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, created = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name,
        defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash
        })

    tier = job_datum.get('tier') or 1

    result = job_datum.get('result', 'unknown')

    # Job tier signatures override the setting from the job structure
    # Check the signatures list for any supported lower tiers that should
    # have an overridden tier.
    if lower_tier_signatures and signature_hash in lower_tier_signatures:
        tier = lower_tier_signatures[signature_hash]

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # This could theoretically already have been created by another process
        # that is running updates simultaneously.  So just attempt to create
        # it, but allow it to skip if it's the same guid.  The odds are
        # extremely high that this is a pending and running job that came in
        # quick succession and are being processed by two different workers.
        Job.objects.get_or_create(guid=job_guid,
                                  defaults={
                                      "repository": repository,
                                      "signature": signature,
                                      "build_platform": build_platform,
                                      "machine_platform": machine_platform,
                                      "machine": machine,
                                      "option_collection_hash":
                                      option_collection_hash,
                                      "job_type": job_type,
                                      "job_group": job_group,
                                      "product": product,
                                      "failure_classification":
                                      default_failure_classification,
                                      "who": who,
                                      "reason": reason,
                                      "result": result,
                                      "state": state,
                                      "tier": tier,
                                      "submit_time": submit_time,
                                      "start_time": start_time,
                                      "end_time": end_time,
                                      "last_modified": datetime.now(),
                                      "push_id": push_id
                                  })
    # Can't just use the ``job`` we would get from the ``get_or_create``
    # because we need to try the job_guid_root instance first for update,
    # rather than a possible retry job instance.
    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)

    # add taskcluster metadata if applicable
    if all([
            k in job_datum
            for k in ['taskcluster_task_id', 'taskcluster_retry_id']
    ]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'])
        except IntegrityError:
            pass

    # Update job with any data that would have changed
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        job_group=job_group,
        product=product,
        failure_classification=default_failure_classification,
        who=who,
        reason=reason,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        push_id=push_id)

    artifacts = job_datum.get('artifacts', [])

    has_text_log_summary = any(x for x in artifacts
                               if x['name'] == 'text_log_summary')
    if artifacts:
        artifacts = serialize_artifact_json_blobs(artifacts)

        # need to add job guid to artifacts, since they likely weren't
        # present in the beginning
        for artifact in artifacts:
            if not all(k in artifact for k in ("name", "type", "blob")):
                raise ValueError(
                    "Artifact missing properties: {}".format(artifact))
            # Ensure every artifact has a ``job_guid`` value.
            # It is legal to submit an artifact that doesn't have a
            # ``job_guid`` value.  But, if missing, it should inherit that
            # value from the job itself.
            if "job_guid" not in artifact:
                artifact["job_guid"] = job_guid

        store_job_artifacts(artifacts)

    log_refs = job_datum.get('log_references', [])
    job_logs = []
    if log_refs:
        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            # this indicates that a summary artifact was submitted with
            # this job that corresponds to the buildbot_text log url.
            # Therefore, the log does not need parsing.  So we should
            # ensure that it's marked as already parsed.
            if has_text_log_summary and name == 'buildbot_text':
                parse_status = JobLog.PARSED
            else:
                parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES])
                mapped_status = parse_status_map.get(log.get('parse_status'))
                if mapped_status:
                    parse_status = mapped_status
                else:
                    parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={'status': parse_status})

            job_logs.append(jl)

        _schedule_log_parsing(job, job_logs, result)

    return (job_guid, signature_hash)
Пример #3
0
def _load_job(repository, job_datum, push_id, lower_tier_signatures):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform', {}).get('architecture',
                                                             'unknown'))

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform', {}).get('architecture',
                                                               'unknown'))

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(
        option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash,
                option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')

    job_group, _ = JobGroup.objects.get_or_create(
        name=job_datum.get('group_name') or 'unknown',
        symbol=job_datum.get('group_symbol') or 'unknown')

    product_name = job_datum.get('product_name', 'unknown')
    if len(product_name.strip()) == 0:
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(lambda x: str(x),
            [build_system_type, repository.name, build_platform.os_name,
             build_platform.platform, build_platform.architecture,
             machine_platform.os_name, machine_platform.platform,
             machine_platform.architecture,
             job_group.name, job_group.symbol, job_type.name,
             job_type.symbol, option_collection_hash,
             reference_data_name])))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, created = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name, defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash
        })

    tier = job_datum.get('tier') or 1

    result = job_datum.get('result', 'unknown')

    # Job tier signatures override the setting from the job structure
    # Check the signatures list for any supported lower tiers that should
    # have an overridden tier.
    if lower_tier_signatures and signature_hash in lower_tier_signatures:
        tier = lower_tier_signatures[signature_hash]

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # This could theoretically already have been created by another process
        # that is running updates simultaneously.  So just attempt to create
        # it, but allow it to skip if it's the same guid.  The odds are
        # extremely high that this is a pending and running job that came in
        # quick succession and are being processed by two different workers.
        Job.objects.get_or_create(
            guid=job_guid,
            defaults={
                "repository": repository,
                "signature": signature,
                "build_platform": build_platform,
                "machine_platform": machine_platform,
                "machine": machine,
                "option_collection_hash": option_collection_hash,
                "job_type": job_type,
                "job_group": job_group,
                "product": product,
                "failure_classification": default_failure_classification,
                "who": who,
                "reason": reason,
                "result": result,
                "state": state,
                "tier": tier,
                "submit_time": submit_time,
                "start_time": start_time,
                "end_time": end_time,
                "last_modified": datetime.now(),
                "push_id": push_id
            }
        )
    # Can't just use the ``job`` we would get from the ``get_or_create``
    # because we need to try the job_guid_root instance first for update,
    # rather than a possible retry job instance.
    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)

    # add taskcluster metadata if applicable
    if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'])
        except IntegrityError:
            pass

    # Update job with any data that would have changed
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        job_group=job_group,
        product=product,
        failure_classification=default_failure_classification,
        who=who,
        reason=reason,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        push_id=push_id)

    artifacts = job_datum.get('artifacts', [])

    has_text_log_summary = any(x for x in artifacts
                               if x['name'] == 'text_log_summary')
    if artifacts:
        artifacts = serialize_artifact_json_blobs(artifacts)

        # need to add job guid to artifacts, since they likely weren't
        # present in the beginning
        for artifact in artifacts:
            if not all(k in artifact for k in ("name", "type", "blob")):
                raise ValueError(
                    "Artifact missing properties: {}".format(artifact))
            # Ensure every artifact has a ``job_guid`` value.
            # It is legal to submit an artifact that doesn't have a
            # ``job_guid`` value.  But, if missing, it should inherit that
            # value from the job itself.
            if "job_guid" not in artifact:
                artifact["job_guid"] = job_guid

        store_job_artifacts(artifacts)

    log_refs = job_datum.get('log_references', [])
    job_logs = []
    if log_refs:
        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            # this indicates that a summary artifact was submitted with
            # this job that corresponds to the buildbot_text log url.
            # Therefore, the log does not need parsing.  So we should
            # ensure that it's marked as already parsed.
            if has_text_log_summary and name == 'buildbot_text':
                parse_status = JobLog.PARSED
            else:
                parse_status_map = dict([(k, v) for (v, k) in
                                         JobLog.STATUSES])
                mapped_status = parse_status_map.get(
                    log.get('parse_status'))
                if mapped_status:
                    parse_status = mapped_status
                else:
                    parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={
                    'status': parse_status
                })

            job_logs.append(jl)

        _schedule_log_parsing(job, job_logs, result)

    return (job_guid, signature_hash)
Пример #4
0
def _load_job(repository, job_datum, push_id):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform',
                               {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform',
                                   {}).get('architecture', 'unknown'),
    )

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform',
                              {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform',
                               {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform',
                                   {}).get('architecture', 'unknown'),
    )

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash, option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')

    job_group, _ = JobGroup.objects.get_or_create(
        name=job_datum.get('group_name') or 'unknown',
        symbol=job_datum.get('group_symbol') or 'unknown',
    )

    product_name = job_datum.get('product_name', 'unknown')
    if not product_name.strip():
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(
            str,
            [
                build_system_type,
                repository.name,
                build_platform.os_name,
                build_platform.platform,
                build_platform.architecture,
                machine_platform.os_name,
                machine_platform.platform,
                machine_platform.architecture,
                job_group.name,
                job_group.symbol,
                job_type.name,
                job_type.symbol,
                option_collection_hash,
                reference_data_name,
            ],
        )).encode('utf-8'))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, _ = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name,
        defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash,
        },
    )

    tier = job_datum.get('tier') or 1

    result = job_datum.get('result', 'unknown')

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # This could theoretically already have been created by another process
        # that is running updates simultaneously.  So just attempt to create
        # it, but allow it to skip if it's the same guid.  The odds are
        # extremely high that this is a pending and running job that came in
        # quick succession and are being processed by two different workers.
        Job.objects.get_or_create(
            guid=job_guid,
            defaults={
                "repository": repository,
                "signature": signature,
                "build_platform": build_platform,
                "machine_platform": machine_platform,
                "machine": machine,
                "option_collection_hash": option_collection_hash,
                "job_type": job_type,
                "job_group": job_group,
                "product": product,
                "failure_classification": default_failure_classification,
                "who": who,
                "reason": reason,
                "result": result,
                "state": state,
                "tier": tier,
                "submit_time": submit_time,
                "start_time": start_time,
                "end_time": end_time,
                "last_modified": datetime.now(),
                "push_id": push_id,
            },
        )
    # Can't just use the ``job`` we would get from the ``get_or_create``
    # because we need to try the job_guid_root instance first for update,
    # rather than a possible retry job instance.
    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)

    # add taskcluster metadata if applicable
    if all([
            k in job_datum
            for k in ['taskcluster_task_id', 'taskcluster_retry_id']
    ]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'],
            )
        except IntegrityError:
            pass

    # Update job with any data that would have changed
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        job_group=job_group,
        product=product,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        push_id=push_id,
    )

    log_refs = job_datum.get('log_references', [])
    job_logs = []
    if log_refs:
        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES])
            mapped_status = parse_status_map.get(log.get('parse_status'))
            if mapped_status:
                parse_status = mapped_status
            else:
                parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={'status': parse_status})

            job_logs.append(jl)

        _schedule_log_parsing(job, job_logs, result, repository)

    return job_guid
Пример #5
0
def _load_job(repository, job_datum, push_id, lower_tier_signatures):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform', {}).get('architecture',
                                                             'unknown'))

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform', {}).get('architecture',
                                                               'unknown'))

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(
        option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash,
                option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    # if a job with this symbol and name exists, always
    # use its default group (even if that group is different
    # from that specified)
    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')
    if job_type.job_group:
        job_group = job_type.job_group
    else:
        job_group, _ = JobGroup.objects.get_or_create(
            name=job_datum.get('group_name') or 'unknown',
            symbol=job_datum.get('group_symbol') or 'unknown')
        job_type.job_group = job_group
        job_type.save(update_fields=['job_group'])

    product_name = job_datum.get('product_name', 'unknown')
    if len(product_name.strip()) == 0:
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(lambda x: str(x),
            [build_system_type, repository.name, build_platform.os_name,
             build_platform.platform, build_platform.architecture,
             machine_platform.os_name, machine_platform.platform,
             machine_platform.architecture,
             job_group.name, job_group.symbol, job_type.name,
             job_type.symbol, option_collection_hash,
             reference_data_name])))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, created = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name, defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash
        })

    if created:
        # A new ReferenceDataSignature has been added, so we need
        # to reload lower tier exclusions
        lower_tier_signatures = _get_lower_tier_signatures(repository)

    tier = job_datum.get('tier') or 1
    # job tier signatures override the setting from the job structure
    # Check the signatures list for any supported lower tiers that have
    # an active exclusion profile.

    result = job_datum.get('result', 'unknown')

    # As stated elsewhere, a job will end up in the lowest tier where its
    # signature belongs.  So if a signature is in Tier-2 and Tier-3, it
    # will end up in 3.
    for tier_info in lower_tier_signatures:
        if signature_hash in tier_info["signatures"]:
            tier = tier_info["tier"]

    try:
        duration = JobDuration.objects.values_list(
            'average_duration', flat=True).get(
                repository=repository, signature=signature_hash)
    except JobDuration.DoesNotExist:
        duration = 0

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # this could theoretically throw an exception if we were processing
        # several updates simultaneously, but that should never happen --
        # and if it does it's better just to error out
        Job.objects.create(
            guid=job_guid,
            repository=repository,
            signature=signature,
            build_platform=build_platform,
            machine_platform=machine_platform,
            machine=machine,
            option_collection_hash=option_collection_hash,
            job_type=job_type,
            product=product,
            failure_classification=default_failure_classification,
            who=who,
            reason=reason,
            result=result,
            state=state,
            tier=tier,
            submit_time=submit_time,
            start_time=start_time,
            end_time=end_time,
            last_modified=datetime.now(),
            running_eta=duration,
            push_id=push_id)

    # if the job was pending, there's nothing more to do here
    # (pending jobs have no artifacts, and we would have just created
    # it)
    if state == 'pending':
        return (job_guid, signature_hash)

    # update job (in the case of a buildbot retrigger, we will
    # get the root object and update that to a retry)
    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        product=product,
        failure_classification=default_failure_classification,
        who=who,
        reason=reason,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        running_eta=duration,
        push_id=push_id)

    if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'])
        except IntegrityError:
            pass
    artifacts = job_datum.get('artifacts', [])

    has_text_log_summary = any(x for x in artifacts
                               if x['name'] == 'text_log_summary')
    if artifacts:
        artifacts = serialize_artifact_json_blobs(artifacts)

        # need to add job guid to artifacts, since they likely weren't
        # present in the beginning
        for artifact in artifacts:
            if not all(k in artifact for k in ("name", "type", "blob")):
                raise ValueError(
                    "Artifact missing properties: {}".format(artifact))
            # Ensure every artifact has a ``job_guid`` value.
            # It is legal to submit an artifact that doesn't have a
            # ``job_guid`` value.  But, if missing, it should inherit that
            # value from the job itself.
            if "job_guid" not in artifact:
                artifact["job_guid"] = job_guid

        store_job_artifacts(artifacts)

    log_refs = job_datum.get('log_references', [])
    if log_refs:

        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            # this indicates that a summary artifact was submitted with
            # this job that corresponds to the buildbot_text log url.
            # Therefore, the log does not need parsing.  So we should
            # ensure that it's marked as already parsed.
            if has_text_log_summary and name == 'buildbot_text':
                parse_status = JobLog.PARSED
            else:
                parse_status_map = dict([(k, v) for (v, k) in
                                         JobLog.STATUSES])
                mapped_status = parse_status_map.get(
                    log.get('parse_status'))
                if mapped_status:
                    parse_status = mapped_status
                else:
                    parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={
                    'status': parse_status
                })

            _schedule_log_parsing(jl, result)

    return (job_guid, signature_hash)
Пример #6
0
    def _load_job(self, job_datum, push_id, lower_tier_signatures):
        """
        Load a job into the treeherder database

        If the job is a ``retry`` the ``job_guid`` will have a special
        suffix on it.  But the matching ``pending``/``running`` job will not.
        So we append the suffixed ``job_guid`` to ``retry_job_guids``
        so that we can update the job_id_lookup later with the non-suffixed
        ``job_guid`` (root ``job_guid``). Then we can find the right
        ``pending``/``running`` job and update it with this ``retry`` job.
        """
        build_platform, _ = BuildPlatform.objects.get_or_create(
            os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
            platform=job_datum.get('build_platform', {}).get('platform', 'unknown'),
            architecture=job_datum.get('build_platform', {}).get('architecture',
                                                                 'unknown'))

        machine_platform, _ = MachinePlatform.objects.get_or_create(
            os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'),
            platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'),
            architecture=job_datum.get('machine_platform', {}).get('architecture',
                                                                   'unknown'))

        option_names = job_datum.get('option_collection', [])
        option_collection_hash = OptionCollection.calculate_hash(
            option_names)
        if not OptionCollection.objects.filter(
                option_collection_hash=option_collection_hash).exists():
            # in the unlikely event that we haven't seen this set of options
            # before, add the appropriate database rows
            options = []
            for option_name in option_names:
                option, _ = Option.objects.get_or_create(name=option_name)
                options.append(option)
            for option in options:
                OptionCollection.objects.create(
                    option_collection_hash=option_collection_hash,
                    option=option)

        machine, _ = Machine.objects.get_or_create(
            name=job_datum.get('machine', 'unknown'))

        # if a job with this symbol and name exists, always
        # use its default group (even if that group is different
        # from that specified)
        job_type, _ = JobType.objects.get_or_create(
            symbol=job_datum.get('job_symbol') or 'unknown',
            name=job_datum.get('name') or 'unknown')
        if job_type.job_group:
            job_group = job_type.job_group
        else:
            job_group, _ = JobGroup.objects.get_or_create(
                name=job_datum.get('group_name') or 'unknown',
                symbol=job_datum.get('group_symbol') or 'unknown')
            job_type.job_group = job_group
            job_type.save(update_fields=['job_group'])

        product_name = job_datum.get('product_name', 'unknown')
        if len(product_name.strip()) == 0:
            product_name = 'unknown'
        product, _ = Product.objects.get_or_create(name=product_name)

        job_guid = job_datum['job_guid']
        job_guid = job_guid[0:50]

        who = job_datum.get('who') or 'unknown'
        who = who[0:50]

        reason = job_datum.get('reason') or 'unknown'
        reason = reason[0:125]

        state = job_datum.get('state') or 'unknown'
        state = state[0:25]

        build_system_type = job_datum.get('build_system_type', 'buildbot')

        reference_data_name = job_datum.get('reference_data_name', None)

        sh = sha1()
        sh.update(''.join(
            map(lambda x: str(x),
                [build_system_type, self.project, build_platform.os_name,
                 build_platform.platform, build_platform.architecture,
                 machine_platform.os_name, machine_platform.platform,
                 machine_platform.architecture,
                 job_group.name, job_group.symbol, job_type.name,
                 job_type.symbol, option_collection_hash,
                 reference_data_name])))
        signature_hash = sh.hexdigest()

        # Should be the buildername in the case of buildbot (if not provided
        # default to using the signature hash)
        if not reference_data_name:
            reference_data_name = signature_hash

        signature, created = ReferenceDataSignatures.objects.get_or_create(
            name=reference_data_name,
            signature=signature_hash,
            build_system_type=build_system_type,
            repository=self.project, defaults={
                'first_submission_timestamp': time.time(),
                'build_os_name': build_platform.os_name,
                'build_platform': build_platform.platform,
                'build_architecture': build_platform.architecture,
                'machine_os_name': machine_platform.os_name,
                'machine_platform': machine_platform.platform,
                'machine_architecture': machine_platform.architecture,
                'job_group_name': job_group.name,
                'job_group_symbol': job_group.symbol,
                'job_type_name': job_type.name,
                'job_type_symbol': job_type.symbol,
                'option_collection_hash': option_collection_hash
            })

        if created:
            # A new ReferenceDataSignature has been added, so we need
            # to reload lower tier exclusions
            lower_tier_signatures = self._get_lower_tier_signatures()

        tier = job_datum.get('tier') or 1
        # job tier signatures override the setting from the job structure
        # Check the signatures list for any supported lower tiers that have
        # an active exclusion profile.

        result = job_datum.get('result', 'unknown')

        # As stated elsewhere, a job will end up in the lowest tier where its
        # signature belongs.  So if a signature is in Tier-2 and Tier-3, it
        # will end up in 3.
        for tier_info in lower_tier_signatures:
            if signature_hash in tier_info["signatures"]:
                tier = tier_info["tier"]

        try:
            duration = JobDuration.objects.values_list(
                'average_duration', flat=True).get(
                    repository__name=self.project, signature=signature_hash)
        except JobDuration.DoesNotExist:
            duration = 0

        # try to insert the job unconditionally (if it already exists, this
        # will be a no-op)
        self.execute(
            proc='jobs.inserts.create_job_data',
            debug_show=self.DEBUG,
            placeholders=[
                [
                    job_guid,
                    signature_hash,
                    None,                   # idx:2, job_coalesced_to_guid,
                    None,
                    push_id,
                    build_platform.id,
                    machine_platform.id,
                    machine.id,
                    option_collection_hash,
                    job_type.id,
                    product.id,
                    who,
                    reason,
                    result,
                    state,
                    self.get_number(job_datum.get('submit_timestamp')),
                    self.get_number(job_datum.get('start_timestamp')),
                    self.get_number(job_datum.get('end_timestamp')),
                    duration,
                    tier,
                    job_guid,
                    get_guid_root(job_guid)  # will be the same except for ``retry`` jobs
                ]
            ],
            executemany=True)

        # by default we should try to update the "root" object
        guid_root = get_guid_root(job_guid)
        ds_job_ids = self.get_job_ids_by_guid([guid_root])
        if ds_job_ids:
            ds_job_id = ds_job_ids[guid_root]['id']
        else:
            ds_job_id = self.get_job_ids_by_guid([job_guid])[job_guid]['id']

        # we might both insert *and* update a job if it comes in with a status
        # that isn't pending, but we're ok with this I think (since this code
        # will be going away soon)
        if state != 'pending':
            self.execute(
                proc="jobs.updates.update_job_data",
                debug_show=self.DEBUG,
                placeholders=[
                    [
                        job_guid,
                        None,
                        None,
                        push_id,
                        machine.id,
                        option_collection_hash,
                        job_type.id,
                        product.id,
                        who,
                        reason,
                        result,
                        state,
                        self.get_number(job_datum.get('start_timestamp')),
                        self.get_number(job_datum.get('end_timestamp')),
                        state,
                        ds_job_id
                    ]
                ],
                executemany=True)

        # create an intermediate representation of the job useful for doing
        # lookups (this will eventually become the main/only/primary jobs table
        # when we finish migrating away from Datasource, see bug 1178641)
        job, _ = Job.objects.update_or_create(
            repository=Repository.objects.get(name=self.project),
            project_specific_id=ds_job_id,
            defaults={
                'guid': job_guid,
                'push_id': push_id
            })

        artifacts = job_datum.get('artifacts', [])

        has_text_log_summary = any(x for x in artifacts
                                   if x['name'] == 'text_log_summary')
        if artifacts:
            artifacts = ArtifactsModel.serialize_artifact_json_blobs(artifacts)

            # need to add job guid to artifacts, since they likely weren't
            # present in the beginning
            for artifact in artifacts:
                if not all(k in artifact for k in ("name", "type", "blob")):
                    raise ValueError(
                        "Artifact missing properties: {}".format(artifact))
                # Ensure every artifact has a ``job_guid`` value.
                # It is legal to submit an artifact that doesn't have a
                # ``job_guid`` value.  But, if missing, it should inherit that
                # value from the job itself.
                if "job_guid" not in artifact:
                    artifact["job_guid"] = job_guid

            with ArtifactsModel(self.project) as artifacts_model:
                artifacts_model.load_job_artifacts(artifacts)

        log_refs = job_datum.get('log_references', [])
        if log_refs:
            for log in log_refs:
                name = log.get('name') or 'unknown'
                name = name[0:50]

                url = log.get('url') or 'unknown'
                url = url[0:255]

                # this indicates that a summary artifact was submitted with
                # this job that corresponds to the buildbot_text log url.
                # Therefore, the log does not need parsing.  So we should
                # ensure that it's marked as already parsed.
                if has_text_log_summary and name == 'buildbot_text':
                    parse_status = JobLog.PARSED
                else:
                    parse_status_map = dict([(k, v) for (v, k) in
                                             JobLog.STATUSES])
                    mapped_status = parse_status_map.get(
                        log.get('parse_status'))
                    if mapped_status:
                        parse_status = mapped_status
                    else:
                        parse_status = JobLog.PENDING

                jl, _ = JobLog.objects.get_or_create(
                    job=job, name=name, url=url, defaults={
                        'status': parse_status
                    })

                self._schedule_log_parsing(jl, result)

        return (job_guid, signature_hash)