Exemplo n.º 1
0
 def retry(self):
     attempt = self.credentials.fetch_report(counter_report=self.counter_report,
                                             start_date=self.start_date,
                                             end_date=month_end(self.end_date))
     attempt.queue_previous = self
     attempt.save()
     return attempt
Exemplo n.º 2
0
 def get_data_with_months(self, dim, filter_params, success_metric):
     if dim not in self.attr_to_query_param_map:
         return HttpResponseBadRequest(
             'unsupported dimension: "{}"'.format(dim))
     # we use 2 separate fields for dim in order to preserve both the ID of the
     # related field and its text value
     values = self.attr_to_query_param_map[dim]
     months = SushiFetchAttempt.objects.aggregate(start=Min('start_date'),
                                                  end=Max('end_date'))
     start = month_start(months['start'])
     end = month_end(months['end'])
     cur_date = start
     output = []
     while cur_date < end:
         # now get the output
         for rec in SushiFetchAttempt.objects.filter(**filter_params).\
             filter(start_date__lte=cur_date, end_date__gte=cur_date).values(*values).annotate(
             success_count=Count('pk', filter=Q(**{success_metric: True})),
             failure_count=Count('pk', filter=Q(**{success_metric: False})),
         ):
             cur_date_str = '-'.join(str(cur_date).split('-')[:2])
             rec['month'] = cur_date_str[2:]
             rec['month_id'] = cur_date_str
             output.append(rec)
         cur_date = month_start(cur_date + timedelta(days=32))
     return output
Exemplo n.º 3
0
 def perform_create(self, serializer: SushiFetchAttemptSerializer):
     serializer.validated_data['in_progress'] = True
     serializer.validated_data['end_date'] = month_end(
         serializer.validated_data['end_date'])
     super().perform_create(serializer)
     attempt = serializer.instance
     run_sushi_fetch_attempt_task.apply_async(args=(attempt.pk, ),
                                              countdown=1)
Exemplo n.º 4
0
def process_fetch_units(fetch_units: [FetchUnit], start_date: date, end_date: date,
                        conflict_ok='skip', conflict_error='smart', sleep_time=0,
                        use_lock=True):
    while fetch_units and start_date >= end_date:
        new_fetch_units = []
        platform = fetch_units[0].credentials.platform
        logger.debug('Processing %d fetch units for platform %s, %s',
                     len(fetch_units), platform, start_date)
        for fetch_unit in fetch_units:  # type: FetchUnit
            end = month_end(start_date)
            # deal with possible conflict
            conflict = fetch_unit.find_conflicting(start_date, end)
            if conflict:
                action = conflict_ok if (conflict.contains_data or conflict.queued)\
                    else conflict_error
                if action == 'smart':
                    # smart means that we use the retry timeout of the conflicting attempt
                    # to decide on what to do
                    action = smart_decide_conflict_action(conflict)
                if action == 'stop':
                    logger.debug('Stopping on existing data: %s, %s: %s',
                                 platform, fetch_unit.credentials.organization, start_date)
                    continue
                elif action == 'skip':
                    logger.debug('Skipping on existing data: %s, %s: %s',
                                 platform, fetch_unit.credentials.organization, start_date)
                    new_fetch_units.append(fetch_unit)
                    continue
                else:
                    logger.debug('Continuing regardless of existing data: %s, %s: %s',
                                 platform, fetch_unit.credentials.organization, start_date)
            # download the data
            fetch_unit.sleep()
            attempt = fetch_unit.download(start_date, end, use_lock=use_lock)
            if attempt.contains_data or attempt.queued:
                new_fetch_units.append(fetch_unit)
            else:
                go_on = False
                # no data in this attempt, we must analyze it further
                if attempt.error_code:
                    error_meaning = attempt.error_explanation()
                    if error_meaning.setup_ok:
                        # this means we can process - the credentials, etc. are OK
                        go_on = True
                if go_on:
                    new_fetch_units.append(fetch_unit)
                else:
                    logger.info('Unsuccessful fetch, stoping: %s, %s: %s',
                                platform, fetch_unit.credentials.organization, start_date)
            # sleep but only if this is not the last in the list - it would not make sense
            # to wait just before finishing
            if fetch_unit is not fetch_units[-1]:
                sleep(sleep_time)
        fetch_units = new_fetch_units
        if fetch_units:
            sleep(sleep_time)  # we will do one more round, we need to sleep
        start_date = month_start(start_date - timedelta(days=20))
    logger.debug('Finished processing')
Exemplo n.º 5
0
 def handle(self, *args, **options):
     self.sleep_time = options['sleep'] / 1000
     args = {}
     if options['organization']:
         args['organization__internal_id'] = options['organization']
     if options['platform']:
         args['platform__short_name'] = options['platform']
     if options['version']:
         args['counter_version'] = int(options['version'])
     credentials = list(SushiCredentials.objects.filter(**args))
     cr_args = {'active': True}
     if options['version']:
         cr_args['counter_version'] = int(options['version'])
     if options['report']:
         cr_args['code'] = options['report']
     # now fetch all possible combinations
     start_date = month_start(parse_date(options['start_date']))
     end_date = month_end(parse_date(options['end_date']))
     # we divide the requests to groups by platform and counter version combination
     # and then process each group in a separate thread
     platform_counter_v_to_requests = {}
     for cred in credentials:
         crs = list(cred.active_counter_reports.filter(**cr_args))
         for cr in crs:
             key = (cred.platform_id, cred.counter_version)
             # check if we have a successful attempt already and skip it if yes
             success_req_for_skip = {'download_success': True, 'processing_success': True} \
                 if not options['skip_on_unsuccess'] else {}
             existing = SushiFetchAttempt.objects.filter(
                 credentials=cred,
                 counter_report=cr,
                 start_date=start_date,
                 end_date=end_date,
                 **success_req_for_skip,
             ).exists()
             if existing:
                 self.stderr.write(
                     self.style.SUCCESS(f'Skipping existing {cred}, {cr}'))
             else:
                 if key not in platform_counter_v_to_requests:
                     platform_counter_v_to_requests[key] = []
                 platform_counter_v_to_requests[key].append(
                     (cred, cr, start_date, end_date))
     if not platform_counter_v_to_requests:
         self.stderr.write(self.style.WARNING('No matching reports found!'))
         return
     # let's create some threads and use them to process individual platforms
     with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
         for result in executor.map(
                 self.download_list,
                 list(platform_counter_v_to_requests.items())):
             pass
Exemplo n.º 6
0
 def interest(self, request, pk):
     org_filter = organization_filter_from_org_id(pk, request.user)
     date_filter = date_filter_from_params(request.GET)
     interest_rt = ReportType.objects.get(short_name='interest',
                                          source__isnull=True)
     data = AccessLog.objects\
         .filter(report_type=interest_rt, **org_filter, **date_filter) \
         .aggregate(interest_sum=Sum('value'), min_date=Min('date'), max_date=Max('date'))
     if data['max_date']:
         # the date might be None and then we do not want to do the math ;)
         data['max_date'] = month_end(data['max_date'])
         data['days'] = (data['max_date'] - data['min_date']).days + 1
     else:
         data['days'] = 0
     return Response(data)
Exemplo n.º 7
0
 def get_queryset(self):
     org_perm_args = Q(
         organization__in=self.request.user.accessible_organizations()) | Q(
             organization__isnull=True)
     query_args = []
     exclude_args = []
     for param in ('platform', 'organization'):
         value = self.request.query_params.get(param)
         if value:
             if value == 'null':
                 # only those with the value equal to None are requested
                 query_args.append(Q(**{param + '__isnull': True}))
             else:
                 # we filter to include those with specified value or with this value null
                 query_args.append(
                     Q(**{param + '_id': value})
                     | Q(**{param + '__isnull': True}))
     start_date = self.request.query_params.get('start_date')
     end_date = self.request.query_params.get('end_date')
     if start_date:
         exclude_args.append(Q(end_date__lte=parse_month(start_date)))
     if end_date:
         exclude_args.append(
             Q(start_date__gte=month_end(parse_month(end_date))))
     if len(exclude_args) > 1:
         # we have more args, we need to "OR" them
         exclude_args = [reduce(operator.or_, exclude_args)]
     qs = (Annotation.objects.filter(org_perm_args).filter(
         *query_args).exclude(*exclude_args).select_related(
             'organization', 'platform').order_by('pk'))
     # add access level stuff
     org_to_level = {}
     user = self.request.user
     for annot in qs:  # type: Annotation
         if not annot.organization_id:
             user_org_level = (UL_CONS_STAFF if user.is_superuser
                               or user.is_from_master_organization else
                               UL_NORMAL)
         else:
             if annot.organization_id not in org_to_level:
                 org_to_level[
                     annot.
                     organization_id] = self.request.user.organization_relationship(
                         annot.organization_id)
             user_org_level = org_to_level[annot.organization_id]
         annot.can_edit = user_org_level >= annot.owner_level
     return qs
Exemplo n.º 8
0
 def test_month_end(self, value, result):
     assert month_end(value) == result