Exemplos de astral_filter em Python, exemplos de treeherder.etl.text.astral_filter em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: failureline.py Projeto: SebastinSanty/treeherder

def replace_astral(log_list):
    for item in log_list:
        for key in ["test", "subtest", "message", "stack", "stackwalk_stdout",
                    "stackwalk_stderr"]:
            if key in item:
                item[key] = astral_filter(item[key])
        yield item

Exemplo n.º 2

0

Exibir arquivo

Arquivo: failureline.py Projeto: sseryani/treeherder

def replace_astral(log_list):
    for item in log_list:
        for key in ["test", "subtest", "message", "stack", "stackwalk_stdout",
                    "stackwalk_stderr"]:
            if key in item:
                item[key] = astral_filter(item[key])
        yield item

Exemplo n.º 3

0

Exibir arquivo

    def store_text_log_summary(self, job, text_log_summary_artifact):
        """
        Store the contents of the text log summary artifact
        """
        step_data = json.loads(text_log_summary_artifact['blob'])['step_data']
        result_map = {v: k for (k, v) in TextLogStep.RESULTS}
        for step in step_data['steps']:
            name = step['name'][:TextLogStep._meta.get_field('name').
                                max_length]
            defaults = {'name': name, 'result': result_map[step['result']]}
            # process start/end times if we have them
            # we currently don't support timezones in treeherder, so
            # just ignore that when importing/updating the bug to avoid
            # a ValueError (though by default the text log summaries
            # we produce should have time expressed in UTC anyway)
            for tkey in ('started', 'finished'):
                if step.get(tkey):
                    defaults[tkey] = dateutil.parser.parse(step[tkey],
                                                           ignoretz=True)

            log_step, _ = TextLogStep.objects.update_or_create(
                job=job,
                started_line_number=step['started_linenumber'],
                finished_line_number=step['finished_linenumber'],
                defaults=defaults)

            if step.get('errors'):
                for error in step['errors']:
                    TextLogError.objects.update_or_create(
                        step=log_step,
                        line_number=error['linenumber'],
                        defaults={'line': astral_filter(error['line'])})

        # create a set of bug suggestions immediately
        load_error_summary(job.repository.name, job.id)

Exemplo n.º 4

0

Exibir arquivo

 def _get_unicode_string(author):
     try:
         return astral_filter(smart_text(author))
     except DjangoUnicodeDecodeError:
         # in certain annoying cases it seems like a field that should be
         # utf-8 is specified in extended ascii (pretty sure this is a mysql
         # issue, since mercurial/github should store/return in utf-8), so
         # do a conversion
         return author.decode('iso-8859-1')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: migrate_resultset_orm.py Projeto: askeing/treeherder

 def _get_unicode_string(author):
     try:
         return astral_filter(smart_text(author))
     except DjangoUnicodeDecodeError:
         # in certain annoying cases it seems like a field that should be
         # utf-8 is specified in extended ascii (pretty sure this is a mysql
         # issue, since mercurial/github should store/return in utf-8), so
         # do a conversion
         return author.decode('iso-8859-1')

Exemplo n.º 6

0

Exibir arquivo

def store_text_log_summary_artifact(job, text_log_summary_artifact):
    """
    Store the contents of the text log summary artifact
    """
    errors = json.loads(text_log_summary_artifact['blob'])['errors']

    with transaction.atomic():
        for error in errors:
            obj, created = TextLogError.objects.get_or_create(
                job=job,
                line_number=error['linenumber'],
                line=astral_filter(error['line']),
            )
            if not created:
                logger.warning('duplicate error lines processed for job %s',
                               job.id)

    # get error summary immediately (to warm the cache)
    error_summary.get_error_summary(job)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: artifact.py Projeto: imskr/treeherder

def store_text_log_summary_artifact(job, text_log_summary_artifact):
    """
    Store the contents of the text log summary artifact
    """
    step_data = json.loads(text_log_summary_artifact['blob'])['step_data']
    result_map = {v: k for (k, v) in TextLogStep.RESULTS}
    with transaction.atomic():
        for step in step_data['steps']:
            name = step['name'][:TextLogStep._meta.get_field('name').
                                max_length]
            # process start/end times if we have them
            # we currently don't support timezones in treeherder, so
            # just ignore that when importing/updating the bug to avoid
            # a ValueError (though by default the text log summaries
            # we produce should have time expressed in UTC anyway)
            time_kwargs = {}
            for tkey in ('started', 'finished'):
                if step.get(tkey):
                    time_kwargs[tkey] = dateutil.parser.parse(step[tkey],
                                                              ignoretz=True)

            log_step = TextLogStep.objects.create(
                job=job,
                started_line_number=step['started_linenumber'],
                finished_line_number=step['finished_linenumber'],
                name=name,
                result=result_map[step['result']],
                **time_kwargs,
            )

            if step.get('errors'):
                for error in step['errors']:
                    TextLogError.objects.create(
                        job=job,
                        step=log_step,
                        line_number=error['linenumber'],
                        line=astral_filter(error['line']),
                    )

    # get error summary immediately (to warm the cache)
    error_summary.get_error_summary(job)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: artifact.py Projeto: edmorley/treeherder

def store_text_log_summary_artifact(job, text_log_summary_artifact):
    """
    Store the contents of the text log summary artifact
    """
    step_data = json.loads(
        text_log_summary_artifact['blob'])['step_data']
    result_map = {v: k for (k, v) in TextLogStep.RESULTS}
    with transaction.atomic():
        for step in step_data['steps']:
            name = step['name'][:TextLogStep._meta.get_field('name').max_length]
            # process start/end times if we have them
            # we currently don't support timezones in treeherder, so
            # just ignore that when importing/updating the bug to avoid
            # a ValueError (though by default the text log summaries
            # we produce should have time expressed in UTC anyway)
            time_kwargs = {}
            for tkey in ('started', 'finished'):
                if step.get(tkey):
                    time_kwargs[tkey] = dateutil.parser.parse(
                        step[tkey], ignoretz=True)

            log_step = TextLogStep.objects.create(
                job=job,
                started_line_number=step['started_linenumber'],
                finished_line_number=step['finished_linenumber'],
                name=name,
                result=result_map[step['result']],
                **time_kwargs)

            if step.get('errors'):
                for error in step['errors']:
                    TextLogError.objects.create(
                        step=log_step,
                        line_number=error['linenumber'],
                        line=astral_filter(error['line']))

    # get error summary immediately (to warm the cache)
    error_summary.get_error_summary(job)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_text.py Projeto: ziqri11/treeherder

def test_astra_filter_hex_value():
    """check the expected outcome is also not changed"""
    hex_values = '\U00000048\U00000049'
    assert hex_values == astral_filter(hex_values)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_text.py Projeto: ziqri11/treeherder

def test_astra_filter_emoji():
    output = astral_filter(u'🍆')
    expected = '<U+01F346>'
    assert output == expected

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_text.py Projeto: edmorley/treeherder

def test_astra_filter_hex_value():
    """check the expected outcome is also not changed"""
    hex_values = '\U00000048\U00000049'
    assert hex_values == astral_filter(hex_values)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_text.py Projeto: edmorley/treeherder

def test_astra_filter_emoji():
    output = astral_filter(u'🍆')
    expected = '<U+01F346>'
    assert output == expected

Exemplo n.º 13

0

Exibir arquivo

Arquivo: migrate_text_log_artifacts.py Projeto: akhileshpillai/treeherder

    def handle(self, *args, **options):
        if options['project']:
            projects = options['project']
        else:
            projects = Datasource.objects.values_list('project', flat=True)

        for ds in Datasource.objects.filter(project__in=projects):
            print ds.project
            try:
                repository = Repository.objects.get(name=ds.project)
            except Repository.DoesNotExist:
                self.stderr.write('No repository for datasource project {}, skipping'.format(
                    ds.project))
                continue

            # we do the migration in two passes: first we migrate the text log
            # steps, then the errors
            db_options = settings.DATABASES['default'].get('OPTIONS', {})
            db = MySQLdb.connect(
                host=settings.DATABASES['default']['HOST'],
                db=ds.name,
                user=settings.DATABASES['default']['USER'],
                passwd=settings.DATABASES['default'].get('PASSWORD') or '',
                **db_options
            )
            c = db.cursor()
            offset = 0
            limit = options['batch_size']
            result_map = dict((v, k) for (k, v) in TextLogStep.RESULTS)

            while True:
                job_id_pairs = Job.objects.filter(
                    id__gt=offset,
                    repository=repository).values_list(
                        'id', 'project_specific_id')[:limit]
                if len(job_id_pairs) == 0:
                    break
                ds_job_ids = set([job_id_pair[1] for job_id_pair in job_id_pairs])
                # filter out those job ids for which we already have
                # generated job details
                ds_job_ids -= set(TextLogStep.objects.filter(
                    job__repository=repository,
                    job__project_specific_id__in=ds_job_ids).values_list(
                        'job__project_specific_id', flat=True))
                start = time.time()
                if ds_job_ids:
                    job_id_mapping = dict((project_specific_id, job_id) for
                                          (job_id, project_specific_id) in
                                          job_id_pairs)
                    c.execute("""SELECT job_id, `blob` from job_artifact where `name` = 'text_log_summary' and job_id in ({})""".format(",".join([str(job_id) for job_id in ds_job_ids])))
                    text_log_steps = []
                    text_log_errors = []

                    def unwrap(row):
                        steps = json.loads(zlib.decompress(row[1]))['step_data']['steps']
                        for step in steps:
                            step['name'] = step['name'][:TextLogStep._meta.get_field('name').max_length]
                            for tkey in ['started', 'finished']:
                                if step.get(tkey):
                                    step[tkey] = dateutil.parser.parse(
                                        step[tkey], ignoretz=True)
                        return (row[0], steps)

                    with transaction.atomic():
                        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
                            for (ds_job_id, steps) in executor.map(unwrap, c.fetchall()):
                                for step in steps:
                                    if step.get('errors'):
                                        text_log_step = TextLogStep.objects.create(
                                            name=step['name'],
                                            result=result_map[step['result']],
                                            job_id=job_id_mapping[ds_job_id],
                                            started_line_number=step['started_linenumber'],
                                            finished_line_number=step['finished_linenumber'],
                                            started=step.get('started'),
                                            finished=step.get('finished'))
                                        lines_covered = set()
                                        for error in step['errors']:
                                            line_number = error['linenumber']
                                            if line_number not in lines_covered:
                                                text_log_errors.append(TextLogError(
                                                    line_number=line_number,
                                                    step=text_log_step,
                                                    line=astral_filter(error['line'])))
                                                lines_covered.add(line_number)
                                    else:
                                        text_log_steps.append(TextLogStep(
                                            name=step['name'],
                                            result=result_map[step['result']],
                                            job_id=job_id_mapping[ds_job_id],
                                            started_line_number=step['started_linenumber'],
                                            finished_line_number=step['finished_linenumber'],
                                            started=step.get('started'),
                                            finished=step.get('finished')))
                        TextLogStep.objects.bulk_create(text_log_steps)
                        TextLogError.objects.bulk_create(text_log_errors)
                self.stdout.write('{} ({})'.format(offset, time.time() - start), ending='')
                self.stdout.flush()
                offset = max([job_id_pair[0] for job_id_pair in job_id_pairs])
            print '\n'