示例#1
0
    def test_datetime_to_json_date(self):
        d1 = datetime(2014, 1, 2, 3, 4, 5, tzinfo=pytz.utc)
        self.assertEqual(datetime_to_json_date(d1), '2014-01-02T03:04:05.000Z')
        self.assertEqual(json_date_to_datetime('2014-01-02T03:04:05.000Z'), d1)
        self.assertEqual(json_date_to_datetime('2014-01-02T03:04:05.000'), d1)

        tz = pytz.timezone("Africa/Kigali")
        d2 = tz.localize(datetime(2014, 1, 2, 3, 4, 5))
        self.assertEqual(datetime_to_json_date(d2), '2014-01-02T01:04:05.000Z')
        self.assertEqual(json_date_to_datetime('2014-01-02T01:04:05.000Z'), d2.astimezone(pytz.utc))
        self.assertEqual(json_date_to_datetime('2014-01-02T01:04:05.000'), d2.astimezone(pytz.utc))
示例#2
0
文件: tests.py 项目: forkkit/ureport
    def test_datetime_to_json_date(self):
        d1 = datetime(2014, 1, 2, 3, 4, 5, tzinfo=pytz.utc)
        self.assertEqual(datetime_to_json_date(d1), "2014-01-02T03:04:05.000Z")
        self.assertEqual(json_date_to_datetime("2014-01-02T03:04:05.000+00:00"), d1)
        self.assertEqual(json_date_to_datetime("2014-01-02T03:04:05.000Z"), d1)
        self.assertEqual(json_date_to_datetime("2014-01-02T03:04:05.000"), d1)

        tz = pytz.timezone("Africa/Kigali")
        d2 = tz.localize(datetime(2014, 1, 2, 3, 4, 5))
        self.assertEqual(datetime_to_json_date(d2), "2014-01-02T01:04:05.000Z")
        self.assertEqual(json_date_to_datetime("2014-01-02T03:04:05+02:00"), d2)
        self.assertEqual(json_date_to_datetime("2014-01-02T01:04:05.000Z"), d2)
        self.assertEqual(json_date_to_datetime("2014-01-02T01:04:05.000"), d2)
示例#3
0
    def _mark_poll_results_sync_completed(poll, org, latest_synced_obj_time):
        # update the time for this poll from which we fetch next time
        cache.set(Poll.POLL_RESULTS_LAST_PULL_CACHE_KEY % (org.pk, poll.flow_uuid),
                  latest_synced_obj_time, None)
        # update the last time the sync happened
        cache.set(Poll.POLL_RESULTS_LAST_SYNC_TIME_CACHE_KEY % (org.pk, poll.flow_uuid),
                  datetime_to_json_date(timezone.now()), None)
        # clear the saved cursor
        cache.delete(Poll.POLL_RESULTS_LAST_PULL_CURSOR % (org.pk, poll.flow_uuid))

        # Use redis cache with expiring(in 48 hrs) key to allow other polls task
        # to sync all polls without hitting the API rate limit
        cache.set(Poll.POLL_RESULTS_LAST_OTHER_POLLS_SYNCED_CACHE_KEY % (org.id, poll.flow_uuid),
                  datetime_to_json_date(timezone.now()), Poll.POLL_RESULTS_LAST_OTHER_POLLS_SYNCED_CACHE_TIMEOUT)
    def populate_poll_poll_date(apps, schema_editor):
        Poll = apps.get_model("polls", "Poll")
        Org = apps.get_model("orgs", "Org")

        agent = getattr(settings, "SITE_API_USER_AGENT", None)
        host = settings.SITE_API_HOST

        for org in Org.objects.all():
            temba_client = TembaClient(host, org.api_token, user_agent=agent)
            api_flows = temba_client.get_flows()
            flows_date = dict()
            for flow in api_flows:
                flows_date[flow.uuid] = datetime_to_json_date(flow.created_on)

            for poll in Poll.objects.filter(org=org):
                json_date = flows_date.get(poll.flow_uuid, None)
                if json_date:
                    date = json_date_to_datetime(json_date)
                else:
                    logger.info(
                        "using created_on for flow_date on poll with id %s" %
                        poll.pk)
                    date = poll.created_on

                poll.poll_date = date
                poll.save()
示例#5
0
    def pull_refresh_task(self):
        from ureport.utils import datetime_to_json_date

        now = timezone.now()
        cache.set(Poll.POLL_PULL_ALL_RESULTS_AFTER_DELETE_FLAG % (self.org_id, self.pk),
                  datetime_to_json_date(now.replace(tzinfo=pytz.utc)), None)

        Poll.pull_poll_results_task(self)
示例#6
0
    def pull_refresh_task(self):
        from ureport.utils import datetime_to_json_date

        now = timezone.now()
        cache.set(Poll.POLL_PULL_ALL_RESULTS_AFTER_DELETE_FLAG % (self.org_id, self.pk),
                  datetime_to_json_date(now.replace(tzinfo=pytz.utc)), None)

        Poll.pull_poll_results_task(self)
示例#7
0
文件: models.py 项目: babiboy/ureport
    def pull_refresh_task(self):
        from ureport.utils import datetime_to_json_date
        from ureport.polls.tasks import pull_refresh

        now = timezone.now()
        cache.set(Poll.POLL_PULL_ALL_RESULTS_AFTER_DELETE_FLAG % (self.org_id, self.pk),
                  datetime_to_json_date(now.replace(tzinfo=pytz.utc)), None)

        pull_refresh.apply_async((self.pk,), queue='sync')
示例#8
0
文件: floip.py 项目: rapidpro/ureport
    def _mark_poll_results_sync_completed(poll, org, latest_synced_obj_time):
        # update the time for this poll from which we fetch next time
        cache.set(Poll.POLL_RESULTS_LAST_PULL_CACHE_KEY % (org.pk, poll.flow_uuid), latest_synced_obj_time, None)
        # update the last time the sync happened
        cache.set(
            Poll.POLL_RESULTS_LAST_SYNC_TIME_CACHE_KEY % (org.pk, poll.flow_uuid),
            datetime_to_json_date(timezone.now()),
            None,
        )
        # clear the saved cursor
        cache.delete(Poll.POLL_RESULTS_LAST_PULL_CURSOR % (org.pk, poll.flow_uuid))

        # Use redis cache with expiring(in 48 hrs) key to allow other polls task
        # to sync all polls without hitting the API rate limit
        cache.set(
            Poll.POLL_RESULTS_LAST_OTHER_POLLS_SYNCED_CACHE_KEY % (org.id, poll.flow_uuid),
            datetime_to_json_date(timezone.now()),
            Poll.POLL_RESULTS_LAST_OTHER_POLLS_SYNCED_CACHE_TIMEOUT,
        )
示例#9
0
    def pull_refresh_task(self):
        from ureport.utils import datetime_to_json_date
        from ureport.polls.tasks import pull_refresh

        now = timezone.now()
        cache.set(
            Poll.POLL_PULL_ALL_RESULTS_AFTER_DELETE_FLAG %
            (self.org_id, self.pk),
            datetime_to_json_date(now.replace(tzinfo=pytz.utc)), None)

        pull_refresh.apply_async((self.pk, ), queue='sync')
示例#10
0
    def fetch_contacts(cls, org, after=None):

        print "START== Fetching contacts for %s" % org.name

        reporter_group = org.get_config('reporter_group')

        temba_client = org.get_temba_client()
        api_groups = temba_client.get_groups(name=reporter_group)

        if not api_groups:
            return

        seen_uuids = []

        group_uuid = None

        for grp in api_groups:
            if grp.name.lower() == reporter_group.lower():
                group_uuid = grp.uuid
                break

        now = timezone.now().replace(tzinfo=pytz.utc)
        before = now

        if not after:
            # consider the after year 2013
            after = json_date_to_datetime("2013-01-01T00:00:00.000")

        while before > after:
            pager = temba_client.pager()
            api_contacts = temba_client.get_contacts(before=before,
                                                     after=after,
                                                     pager=pager)

            last_contact_index = len(api_contacts) - 1

            for i, contact in enumerate(api_contacts):
                if i == last_contact_index:
                    before = contact.modified_on

                if group_uuid in contact.groups:
                    cls.update_or_create_from_temba(org, contact)
                    seen_uuids.append(contact.uuid)

            if not pager.has_more():
                cache.set(cls.CONTACT_LAST_FETCHED_CACHE_KEY % org.pk,
                          datetime_to_json_date(now.replace(tzinfo=pytz.utc)),
                          cls.CONTACT_LAST_FETCHED_CACHE_TIMEOUT)
                break

        return seen_uuids
示例#11
0
    def fetch_contacts(cls, org, after=None):

        print "START== Fetching contacts for %s" % org.name

        reporter_group = org.get_config('reporter_group')

        temba_client = org.get_temba_client()
        api_groups = temba_client.get_groups(name=reporter_group)

        if not api_groups:
            return

        seen_uuids = []

        group_uuid = None

        for grp in api_groups:
            if grp.name.lower() == reporter_group.lower():
                group_uuid = grp.uuid
                break

        now = timezone.now().replace(tzinfo=pytz.utc)
        before = now

        if not after:
            # consider the after year 2013
            after = json_date_to_datetime("2013-01-01T00:00:00.000")

        while before > after:
            pager = temba_client.pager()
            api_contacts = temba_client.get_contacts(before=before, after=after, pager=pager)

            last_contact_index = len(api_contacts) - 1

            for i, contact in enumerate(api_contacts):
                if i == last_contact_index:
                    before = contact.modified_on

                if group_uuid in contact.groups:
                    cls.update_or_create_from_temba(org, contact)
                    seen_uuids.append(contact.uuid)

            if not pager.has_more():
                cache.set(cls.CONTACT_LAST_FETCHED_CACHE_KEY % org.pk,
                          datetime_to_json_date(now.replace(tzinfo=pytz.utc)),
                          cls.CONTACT_LAST_FETCHED_CACHE_TIMEOUT)
                break

        return seen_uuids
示例#12
0
    def fetch_flows(self, org):
        client = self._get_client(org, 2)
        flows = client.get_flows().all()

        all_flows = dict()
        for flow in flows:
            flow_json = dict()
            flow_json['uuid'] = flow.uuid
            flow_json['date_hint'] = flow.created_on.strftime('%Y-%m-%d')
            flow_json['created_on'] = datetime_to_json_date(flow.created_on)
            flow_json['name'] = flow.name
            flow_json['archived'] = flow.archived
            flow_json['runs'] = flow.runs.active + flow.runs.expired + flow.runs.completed + flow.runs.interrupted
            flow_json['completed_runs'] = flow.runs.completed

            all_flows[flow.uuid] = flow_json
        return all_flows
示例#13
0
    def fetch_flows(self, org):
        client = self._get_client(org, 2)
        flows = client.get_flows().all()

        all_flows = dict()
        for flow in flows:
            flow_json = dict()
            flow_json["uuid"] = flow.uuid
            flow_json["date_hint"] = flow.created_on.strftime("%Y-%m-%d")
            flow_json["created_on"] = datetime_to_json_date(flow.created_on)
            flow_json["name"] = flow.name
            flow_json["archived"] = flow.archived
            flow_json["runs"] = flow.runs.active + flow.runs.expired + flow.runs.completed + flow.runs.interrupted
            flow_json["completed_runs"] = flow.runs.completed

            all_flows[flow.uuid] = flow_json
        return all_flows
示例#14
0
    def fetch_flows(self, org):
        client = self._get_client(org, 2)
        flows = client.get_flows().all()

        all_flows = dict()
        for flow in flows:
            flow_json = dict()
            flow_json["uuid"] = flow.uuid
            flow_json["date_hint"] = flow.created_on.strftime("%Y-%m-%d")
            flow_json["created_on"] = datetime_to_json_date(flow.created_on)
            flow_json["name"] = flow.name
            flow_json["archived"] = flow.archived
            flow_json[
                "runs"] = flow.runs.active + flow.runs.expired + flow.runs.completed + flow.runs.interrupted
            flow_json["completed_runs"] = flow.runs.completed
            flow_json["results"] = [{
                "key": elt.key,
                "name": elt.name,
                "categories": elt.categories,
                "node_uuids": elt.node_uuids
            } for elt in flow.results]

            all_flows[flow.uuid] = flow_json
        return all_flows
示例#15
0
    def populate_poll_poll_date(apps, schema_editor):
        Poll = apps.get_model('polls', "Poll")
        Org = apps.get_model('orgs', "Org")

        agent = getattr(settings, 'SITE_API_USER_AGENT', None)
        host = settings.SITE_API_HOST

        for org in Org.objects.all():
            temba_client = TembaClient(host, org.api_token, user_agent=agent)
            api_flows = temba_client.get_flows()
            flows_date = dict()
            for flow in api_flows:
                flows_date[flow.uuid] = datetime_to_json_date(flow.created_on)

            for poll in Poll.objects.filter(org=org):
                json_date = flows_date.get(poll.flow_uuid, None)
                if json_date:
                    date = json_date_to_datetime(json_date)
                else:
                    print "using created_on for flow_date on poll with id %s" % poll.pk
                    date = poll.created_on

                poll.poll_date = date
                poll.save()
示例#16
0
    def pull_results(self, poll, modified_after, modified_before, progress_callback=None):
        org = poll.org
        r = get_redis_connection()
        key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid)

        num_val_created = 0
        num_val_updated = 0
        num_val_ignored = 0

        num_path_created = 0
        num_path_updated = 0
        num_path_ignored = 0

        num_synced = 0

        if r.get(key):
            print "Skipping pulling results for poll #%d on org #%d as it is still running" % (poll.pk, org.pk)
        else:
            with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT):
                client = self._get_client(org, 2)

                questions_uuids = poll.get_question_uuids()

                # ignore the TaskState time and use the time we stored in redis
                (after, before, latest_synced_obj_time,
                 batches_latest, resume_cursor, pull_after_delete) = poll.get_pull_cached_params()

                if resume_cursor is None:
                    before = datetime_to_json_date(timezone.now())
                    after = latest_synced_obj_time

                if pull_after_delete is not None:
                    after = None
                    poll.delete_poll_results()

                start = time.time()
                print "Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk)

                poll_runs_query = client.get_runs(flow=poll.flow_uuid, after=after, before=before)
                fetches = poll_runs_query.iterfetches(retry_on_rate_exceed=True, resume_cursor=resume_cursor)

                fetch_start = time.time()
                for fetch in fetches:

                    print "RapidPro API fetch for poll #%d on org #%d %d - %d took %ds" % (poll.pk, org.pk, num_synced,
                                                                                           num_synced + len(fetch),
                                                                                           time.time() - fetch_start)

                    contact_uuids = [run.contact.uuid for run in fetch]
                    contacts = Contact.objects.filter(org=org, uuid__in=contact_uuids)
                    contacts_map = {c.uuid: c for c in contacts}

                    existing_poll_results = PollResult.objects.filter(flow=poll.flow_uuid, org=poll.org_id, contact__in=contact_uuids)

                    poll_results_map = defaultdict(dict)
                    for res in existing_poll_results:
                        poll_results_map[res.contact][res.ruleset] = res

                    poll_results_to_save_map = defaultdict(dict)

                    for temba_run in fetch:

                        if batches_latest is None or temba_run.modified_on > json_date_to_datetime(batches_latest):
                            batches_latest = datetime_to_json_date(temba_run.modified_on.replace(tzinfo=pytz.utc))

                        flow_uuid = temba_run.flow.uuid
                        contact_uuid = temba_run.contact.uuid
                        completed = temba_run.exit_type == 'completed'

                        contact_obj = contacts_map.get(contact_uuid, None)

                        state = ''
                        district = ''
                        ward = ''
                        born = None
                        gender = None
                        if contact_obj is not None:
                            state = contact_obj.state
                            district = contact_obj.district
                            ward = contact_obj.ward
                            born = contact_obj.born
                            gender = contact_obj.gender

                        temba_values = temba_run.values.values()
                        temba_values.sort(key=lambda val: val.time)

                        for temba_value in temba_values:
                            ruleset_uuid = temba_value.node
                            category = temba_value.category
                            text = temba_value.value
                            value_date = temba_value.time

                            existing_poll_result = poll_results_map.get(contact_uuid, dict()).get(ruleset_uuid, None)

                            poll_result_to_save = poll_results_to_save_map.get(contact_uuid, dict()).get(ruleset_uuid, None)

                            if existing_poll_result is not None:

                                update_required = existing_poll_result.category != category or existing_poll_result.text != text
                                update_required = update_required or existing_poll_result.state != state
                                update_required = update_required or existing_poll_result.district != district
                                update_required = update_required or existing_poll_result.ward != ward
                                update_required = update_required or existing_poll_result.born != born
                                update_required = update_required or existing_poll_result.gender != gender
                                update_required = update_required or existing_poll_result.completed != completed

                                # if the reporter answered the step, check if this is a newer run
                                if existing_poll_result.date is not None:
                                     update_required = update_required and (value_date > existing_poll_result.date)
                                else:
                                    update_required = True

                                if update_required:
                                    # update the db object
                                    PollResult.objects.filter(pk=existing_poll_result.pk).update(category=category, text=text,
                                                                                                 state=state, district=district,
                                                                                                 ward=ward, date=value_date,
                                                                                                 born=born, gender=gender,
                                                                                                 completed=completed)

                                    # update the map object as well
                                    existing_poll_result.category = category
                                    existing_poll_result.text = text
                                    existing_poll_result.state = state
                                    existing_poll_result.district = district
                                    existing_poll_result.ward = ward
                                    existing_poll_result.date = value_date
                                    existing_poll_result.born = born
                                    existing_poll_result.gender = gender
                                    existing_poll_result.completed = completed

                                    poll_results_map[contact_uuid][ruleset_uuid] = existing_poll_result

                                    num_val_updated += 1
                                else:
                                    num_val_ignored += 1

                            elif poll_result_to_save is not None:

                                replace_save_map = poll_result_to_save.category != category or poll_result_to_save.text != text
                                replace_save_map = replace_save_map or poll_result_to_save.state != state
                                replace_save_map = replace_save_map or poll_result_to_save.district != district
                                replace_save_map = replace_save_map or poll_result_to_save.ward != ward
                                replace_save_map = replace_save_map or poll_result_to_save.born != born
                                replace_save_map = replace_save_map or poll_result_to_save.gender != gender
                                replace_save_map = replace_save_map or poll_result_to_save.completed != completed

                                # replace if the step is newer
                                if poll_result_to_save.date is not None:
                                    replace_save_map = replace_save_map and (value_date > poll_result_to_save.date)

                                if replace_save_map:
                                    result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid,
                                                            contact=contact_uuid, category=category, text=text,
                                                            state=state, district=district, ward=ward,
                                                            born=born, gender=gender,
                                                            date=value_date, completed=completed)

                                    poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj

                                num_val_ignored += 1
                            else:

                                result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid,
                                                        contact=contact_uuid, category=category, text=text,
                                                        state=state, district=district, ward=ward, born=born,
                                                        gender=gender, date=value_date, completed=completed)

                                poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj

                                num_val_created += 1

                        for temba_path in temba_run.path:
                            ruleset_uuid = temba_path.node
                            category = None
                            text = ""
                            value_date = temba_path.time

                            if ruleset_uuid in questions_uuids:
                                existing_poll_result = poll_results_map.get(contact_uuid, dict()).get(ruleset_uuid, None)

                                poll_result_to_save = poll_results_to_save_map.get(contact_uuid, dict()).get(ruleset_uuid, None)

                                if existing_poll_result is not None:
                                    if existing_poll_result.date is None or value_date > existing_poll_result.date:
                                        # update the db object
                                        PollResult.objects.filter(pk=existing_poll_result.pk).update(category=category,
                                                                                                     text=text,
                                                                                                     state=state,
                                                                                                     district=district,
                                                                                                     ward=ward,
                                                                                                     date=value_date,
                                                                                                     born=born,
                                                                                                     gender=gender,
                                                                                                     completed=completed)

                                        # update the map object as well
                                        existing_poll_result.category = category
                                        existing_poll_result.text = text
                                        existing_poll_result.state = state
                                        existing_poll_result.district = district
                                        existing_poll_result.ward = ward
                                        existing_poll_result.date = value_date
                                        existing_poll_result.born = born
                                        existing_poll_result.gender = gender
                                        existing_poll_result.completed = completed

                                        poll_results_map[contact_uuid][ruleset_uuid] = existing_poll_result

                                        num_path_updated += 1
                                    else:
                                        num_path_ignored += 1

                                elif poll_result_to_save is not None:
                                    if value_date > poll_result_to_save.date:
                                        result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid,
                                                                contact=contact_uuid, category=category, text=text,
                                                                state=state, district=district, ward=ward,
                                                                born=born, gender=gender,
                                                                date=value_date, completed=completed)

                                        poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj

                                    num_path_ignored += 1

                                else:

                                    result_obj = PollResult(org=org, flow=flow_uuid, ruleset=ruleset_uuid,
                                                            contact=contact_uuid, category=category, text=text,
                                                            state=state, district=district, ward=ward, born=born,
                                                            gender=gender, date=value_date, completed=completed)

                                    poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj

                                    num_path_created += 1

                            else:
                                num_path_ignored += 1

                    num_synced += len(fetch)
                    if progress_callback:
                        progress_callback(num_synced)

                    new_poll_results = []

                    for c_key in poll_results_to_save_map.keys():
                        for r_key in poll_results_to_save_map.get(c_key, dict()):
                            obj_to_create = poll_results_to_save_map.get(c_key, dict()).get(r_key, None)
                            if obj_to_create is not None:
                                new_poll_results.append(obj_to_create)

                    PollResult.objects.bulk_create(new_poll_results)

                    print "Processed fetch of %d - %d runs for poll #%d on org #%d" % (num_synced - len(fetch),
                                                                                       num_synced,
                                                                                       poll.pk,
                                                                                       org.pk)
                    fetch_start = time.time()
                    print "=" * 40

                    if num_synced >= Poll.POLL_RESULTS_MAX_SYNC_RUNS:
                        poll.rebuild_poll_results_counts()
                        cursor = fetches.get_cursor()

                        cache.set(Poll.POLL_RESULTS_LAST_PULL_CURSOR % (org.pk, poll.flow_uuid), cursor, None)

                        cache.set(Poll.POLL_RESULTS_CURSOR_AFTER_CACHE_KEY % (org.pk, poll.flow_uuid),
                                  after, None)

                        cache.set(Poll.POLL_RESULTS_CURSOR_BEFORE_CACHE_KEY % (org.pk, poll.flow_uuid),
                                  before, None)

                        cache.set(Poll.POLL_RESULTS_BATCHES_LATEST_CACHE_KEY % (org.pk, poll.flow_uuid),
                                  batches_latest, None)

                        print "Break pull results for poll #%d on org #%d in %ds, "\
                              " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"\
                              " Objects: created %d, updated %d, ignored %d. " \
                              "Before cursor %s" % (poll.pk, org.pk, time.time() - start, after, before, batches_latest,
                                                    latest_synced_obj_time, num_val_created, num_val_updated,
                                                    num_val_ignored, cursor)

                        return (num_val_created, num_val_updated, num_val_ignored,
                                num_path_created, num_path_updated, num_path_ignored)

                if batches_latest is not None and (latest_synced_obj_time is None or json_date_to_datetime(latest_synced_obj_time) <= json_date_to_datetime(batches_latest)):
                    latest_synced_obj_time = batches_latest

                # update the time for this poll from which we fetch next time
                cache.set(Poll.POLL_RESULTS_LAST_PULL_CACHE_KEY % (org.pk, poll.flow_uuid),
                          latest_synced_obj_time, None)

                # clear the saved cursor
                cache.delete(Poll.POLL_RESULTS_LAST_PULL_CURSOR % (org.pk, poll.flow_uuid))

                # from django.db import connection as db_connection, reset_queries
                # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10]
                # for q in slowest_queries:
                #     print "=" * 60
                #     print "\n\n\n"
                #     print "%s -- %s" % (q['time'], q['sql'])
                # reset_queries()

                print "Finished pulling results for poll #%d on org #%d runs in %ds, " \
                      "Times: sync_latest= %s," \
                      "Objects: created %d, updated %d, ignored %d" % (poll.pk, org.pk, time.time() - start,
                                                                       latest_synced_obj_time,
                                                                       num_val_created, num_val_updated,
                                                                       num_val_ignored)
        return num_val_created, num_val_updated, num_val_ignored, num_path_created, num_path_updated, num_path_ignored
示例#17
0
文件: floip.py 项目: obed-ak/ureport
    def pull_results(self, poll, modified_after, modified_before, progress_callback=None):
        org = poll.org
        r = get_redis_connection()
        key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid)

        stats_dict = dict(
            num_val_created=0,
            num_val_updated=0,
            num_val_ignored=0,
            num_path_created=0,
            num_path_updated=0,
            num_path_ignored=0,
            num_synced=0,
        )

        if r.get(key):
            logger.info("Skipping pulling results for poll #%d on org #%d as it is still running" % (poll.pk, org.pk))
        else:
            with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT):
                lock_expiration = time.time() + 0.8 * Poll.POLL_SYNC_LOCK_TIMEOUT

                poll_results_url = "https://go.votomobile.org/flow-results/packages/%s/responses" % poll.flow_uuid

                headers = {
                    "Content-type": "application/json",
                    "Accept": "application/json",
                    "Authorization": "Token %s" % self.backend.api_token,
                }

                results = []

                questions_uuids = poll.get_question_uuids()

                # ignore the TaskState time and use the time we stored in redis
                (
                    after,
                    before,
                    latest_synced_obj_time,
                    batches_latest,
                    resume_cursor,
                    pull_after_delete,
                ) = poll.get_pull_cached_params()

                if pull_after_delete is not None:
                    after = None
                    latest_synced_obj_time = None
                    batches_latest = None
                    resume_cursor = None
                    poll.delete_poll_results()

                if resume_cursor is None:
                    before = datetime_to_json_date(timezone.now())
                    after = latest_synced_obj_time

                start = time.time()
                logger.info("Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk))

                params = dict(
                    filter={"end-timestamp": before, "start-timestamp": after}, page={"beforeCursor": resume_cursor}
                )

                while poll_results_url:
                    response = requests.request("GET", poll_results_url, headers=headers, params=params)
                    response_json = response.json()

                    results = response_json["data"]["attributes"]["responses"]
                    poll_results_url = response_json["data"]["relationships"]["links"]["next"]

                    contacts_map, poll_results_map, poll_results_to_save_map = self._initiate_lookup_maps(
                        results, org, poll
                    )

                    for result in results:
                        if batches_latest is None or json_date_to_datetime(result[0]) > json_date_to_datetime(
                            batches_latest
                        ):
                            batches_latest = result[0]

                        contact_obj = contacts_map.get(result[2], None)
                        self._process_run_poll_results(
                            org,
                            poll.flow_uuid,
                            questions_uuids,
                            result,
                            contact_obj,
                            poll_results_map,
                            poll_results_to_save_map,
                            stats_dict,
                        )

                        stats_dict["num_synced"] += len(results)
                        if progress_callback:
                            progress_callback(stats_dict["num_synced"])

                    self._save_new_poll_results_to_database(poll_results_to_save_map)

                    logger.info(
                        "Processed fetch of %d - %d "
                        "runs for poll #%d on org #%d"
                        % (stats_dict["num_synced"] - len(results), stats_dict["num_synced"], poll.pk, org.pk)
                    )
                    # fetch_start = time.time()
                    logger.info("=" * 40)

                    if stats_dict["num_synced"] >= Poll.POLL_RESULTS_MAX_SYNC_RUNS or time.time() > lock_expiration:
                        poll.rebuild_poll_results_counts()

                        cursor = result[1]
                        self._mark_poll_results_sync_paused(org, poll, cursor, after, before, batches_latest)

                        logger.info(
                            "Break pull results for poll #%d on org #%d in %ds, "
                            " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"
                            " Objects: created %d, updated %d, ignored %d. "
                            "Before cursor %s"
                            % (
                                poll.pk,
                                org.pk,
                                time.time() - start,
                                after,
                                before,
                                batches_latest,
                                latest_synced_obj_time,
                                stats_dict["num_val_created"],
                                stats_dict["num_val_updated"],
                                stats_dict["num_val_ignored"],
                                cursor,
                            )
                        )

                        return (
                            stats_dict["num_val_created"],
                            stats_dict["num_val_updated"],
                            stats_dict["num_val_ignored"],
                            stats_dict["num_path_created"],
                            stats_dict["num_path_updated"],
                            stats_dict["num_path_ignored"],
                        )

                if batches_latest is not None and (
                    latest_synced_obj_time is None
                    or json_date_to_datetime(latest_synced_obj_time) <= json_date_to_datetime(batches_latest)
                ):
                    latest_synced_obj_time = batches_latest

                self._mark_poll_results_sync_completed(poll, org, latest_synced_obj_time)

                # from django.db import connection as db_connection, reset_queries
                # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10]
                # for q in slowest_queries:
                #     print "=" * 60
                #     print "\n\n\n"
                #     print "%s -- %s" % (q['time'], q['sql'])
                # reset_queries()

                logger.info(
                    "Finished pulling results for poll #%d on org #%d runs in %ds, "
                    "Times: sync_latest= %s,"
                    "Objects: created %d, updated %d, ignored %d"
                    % (
                        poll.pk,
                        org.pk,
                        time.time() - start,
                        latest_synced_obj_time,
                        stats_dict["num_val_created"],
                        stats_dict["num_val_updated"],
                        stats_dict["num_val_ignored"],
                    )
                )
        return (
            stats_dict["num_val_created"],
            stats_dict["num_val_updated"],
            stats_dict["num_val_ignored"],
            stats_dict["num_path_created"],
            stats_dict["num_path_updated"],
            stats_dict["num_path_ignored"],
        )
示例#18
0
    def pull_results(self,
                     poll,
                     modified_after,
                     modified_before,
                     progress_callback=None):
        org = poll.org
        r = get_redis_connection()
        key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid)

        num_val_created = 0
        num_val_updated = 0
        num_val_ignored = 0

        num_path_created = 0
        num_path_updated = 0
        num_path_ignored = 0

        num_synced = 0

        if r.get(key):
            print "Skipping pulling results for poll #%d on org #%d as it is still running" % (
                poll.pk, org.pk)
        else:
            with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT):
                client = self._get_client(org, 2)

                questions_uuids = poll.get_question_uuids()

                # ignore the TaskState time and use the time we stored in redis
                (after, before, latest_synced_obj_time, batches_latest,
                 resume_cursor,
                 pull_after_delete) = poll.get_pull_cached_params()

                if resume_cursor is None:
                    before = datetime_to_json_date(timezone.now())
                    after = latest_synced_obj_time

                if pull_after_delete is not None:
                    after = None
                    poll.delete_poll_results()

                start = time.time()
                print "Start fetching runs for poll #%d on org #%d" % (poll.pk,
                                                                       org.pk)

                poll_runs_query = client.get_runs(flow=poll.flow_uuid,
                                                  after=after,
                                                  before=before)
                fetches = poll_runs_query.iterfetches(
                    retry_on_rate_exceed=True, resume_cursor=resume_cursor)

                fetch_start = time.time()
                for fetch in fetches:

                    print "RapidPro API fetch for poll #%d on org #%d %d - %d took %ds" % (
                        poll.pk, org.pk, num_synced, num_synced + len(fetch),
                        time.time() - fetch_start)

                    contact_uuids = [run.contact.uuid for run in fetch]
                    contacts = Contact.objects.filter(org=org,
                                                      uuid__in=contact_uuids)
                    contacts_map = {c.uuid: c for c in contacts}

                    existing_poll_results = PollResult.objects.filter(
                        flow=poll.flow_uuid,
                        org=poll.org_id,
                        contact__in=contact_uuids)

                    poll_results_map = defaultdict(dict)
                    for res in existing_poll_results:
                        poll_results_map[res.contact][res.ruleset] = res

                    poll_results_to_save_map = defaultdict(dict)

                    for temba_run in fetch:

                        if batches_latest is None or temba_run.modified_on > json_date_to_datetime(
                                batches_latest):
                            batches_latest = datetime_to_json_date(
                                temba_run.modified_on.replace(tzinfo=pytz.utc))

                        flow_uuid = temba_run.flow.uuid
                        contact_uuid = temba_run.contact.uuid
                        completed = temba_run.exit_type == 'completed'

                        contact_obj = contacts_map.get(contact_uuid, None)

                        state = ''
                        district = ''
                        ward = ''
                        born = None
                        gender = None
                        if contact_obj is not None:
                            state = contact_obj.state
                            district = contact_obj.district
                            ward = contact_obj.ward
                            born = contact_obj.born
                            gender = contact_obj.gender

                        temba_values = temba_run.values.values()
                        temba_values.sort(key=lambda val: val.time)

                        for temba_value in temba_values:
                            ruleset_uuid = temba_value.node
                            category = temba_value.category
                            text = temba_value.value
                            value_date = temba_value.time

                            existing_poll_result = poll_results_map.get(
                                contact_uuid, dict()).get(ruleset_uuid, None)

                            poll_result_to_save = poll_results_to_save_map.get(
                                contact_uuid, dict()).get(ruleset_uuid, None)

                            if existing_poll_result is not None:

                                update_required = existing_poll_result.category != category or existing_poll_result.text != text
                                update_required = update_required or existing_poll_result.state != state
                                update_required = update_required or existing_poll_result.district != district
                                update_required = update_required or existing_poll_result.ward != ward
                                update_required = update_required or existing_poll_result.born != born
                                update_required = update_required or existing_poll_result.gender != gender
                                update_required = update_required or existing_poll_result.completed != completed

                                # if the reporter answered the step, check if this is a newer run
                                if existing_poll_result.date is not None:
                                    update_required = update_required and (
                                        value_date > existing_poll_result.date)
                                else:
                                    update_required = True

                                if update_required:
                                    # update the db object
                                    PollResult.objects.filter(
                                        pk=existing_poll_result.pk).update(
                                            category=category,
                                            text=text,
                                            state=state,
                                            district=district,
                                            ward=ward,
                                            date=value_date,
                                            born=born,
                                            gender=gender,
                                            completed=completed)

                                    # update the map object as well
                                    existing_poll_result.category = category
                                    existing_poll_result.text = text
                                    existing_poll_result.state = state
                                    existing_poll_result.district = district
                                    existing_poll_result.ward = ward
                                    existing_poll_result.date = value_date
                                    existing_poll_result.born = born
                                    existing_poll_result.gender = gender
                                    existing_poll_result.completed = completed

                                    poll_results_map[contact_uuid][
                                        ruleset_uuid] = existing_poll_result

                                    num_val_updated += 1
                                else:
                                    num_val_ignored += 1

                            elif poll_result_to_save is not None:

                                replace_save_map = poll_result_to_save.category != category or poll_result_to_save.text != text
                                replace_save_map = replace_save_map or poll_result_to_save.state != state
                                replace_save_map = replace_save_map or poll_result_to_save.district != district
                                replace_save_map = replace_save_map or poll_result_to_save.ward != ward
                                replace_save_map = replace_save_map or poll_result_to_save.born != born
                                replace_save_map = replace_save_map or poll_result_to_save.gender != gender
                                replace_save_map = replace_save_map or poll_result_to_save.completed != completed

                                # replace if the step is newer
                                if poll_result_to_save.date is not None:
                                    replace_save_map = replace_save_map and (
                                        value_date > poll_result_to_save.date)

                                if replace_save_map:
                                    result_obj = PollResult(
                                        org=org,
                                        flow=flow_uuid,
                                        ruleset=ruleset_uuid,
                                        contact=contact_uuid,
                                        category=category,
                                        text=text,
                                        state=state,
                                        district=district,
                                        ward=ward,
                                        born=born,
                                        gender=gender,
                                        date=value_date,
                                        completed=completed)

                                    poll_results_to_save_map[contact_uuid][
                                        ruleset_uuid] = result_obj

                                num_val_ignored += 1
                            else:

                                result_obj = PollResult(org=org,
                                                        flow=flow_uuid,
                                                        ruleset=ruleset_uuid,
                                                        contact=contact_uuid,
                                                        category=category,
                                                        text=text,
                                                        state=state,
                                                        district=district,
                                                        ward=ward,
                                                        born=born,
                                                        gender=gender,
                                                        date=value_date,
                                                        completed=completed)

                                poll_results_to_save_map[contact_uuid][
                                    ruleset_uuid] = result_obj

                                num_val_created += 1

                        for temba_path in temba_run.path:
                            ruleset_uuid = temba_path.node
                            category = None
                            text = ""
                            value_date = temba_path.time

                            if ruleset_uuid in questions_uuids:
                                existing_poll_result = poll_results_map.get(
                                    contact_uuid,
                                    dict()).get(ruleset_uuid, None)

                                poll_result_to_save = poll_results_to_save_map.get(
                                    contact_uuid,
                                    dict()).get(ruleset_uuid, None)

                                if existing_poll_result is not None:
                                    if existing_poll_result.date is None or value_date > existing_poll_result.date:
                                        # update the db object
                                        PollResult.objects.filter(
                                            pk=existing_poll_result.pk).update(
                                                category=category,
                                                text=text,
                                                state=state,
                                                district=district,
                                                ward=ward,
                                                date=value_date,
                                                born=born,
                                                gender=gender,
                                                completed=completed)

                                        # update the map object as well
                                        existing_poll_result.category = category
                                        existing_poll_result.text = text
                                        existing_poll_result.state = state
                                        existing_poll_result.district = district
                                        existing_poll_result.ward = ward
                                        existing_poll_result.date = value_date
                                        existing_poll_result.born = born
                                        existing_poll_result.gender = gender
                                        existing_poll_result.completed = completed

                                        poll_results_map[contact_uuid][
                                            ruleset_uuid] = existing_poll_result

                                        num_path_updated += 1
                                    else:
                                        num_path_ignored += 1

                                elif poll_result_to_save is not None:
                                    if value_date > poll_result_to_save.date:
                                        result_obj = PollResult(
                                            org=org,
                                            flow=flow_uuid,
                                            ruleset=ruleset_uuid,
                                            contact=contact_uuid,
                                            category=category,
                                            text=text,
                                            state=state,
                                            district=district,
                                            ward=ward,
                                            born=born,
                                            gender=gender,
                                            date=value_date,
                                            completed=completed)

                                        poll_results_to_save_map[contact_uuid][
                                            ruleset_uuid] = result_obj

                                    num_path_ignored += 1

                                else:

                                    result_obj = PollResult(
                                        org=org,
                                        flow=flow_uuid,
                                        ruleset=ruleset_uuid,
                                        contact=contact_uuid,
                                        category=category,
                                        text=text,
                                        state=state,
                                        district=district,
                                        ward=ward,
                                        born=born,
                                        gender=gender,
                                        date=value_date,
                                        completed=completed)

                                    poll_results_to_save_map[contact_uuid][
                                        ruleset_uuid] = result_obj

                                    num_path_created += 1

                            else:
                                num_path_ignored += 1

                    num_synced += len(fetch)
                    if progress_callback:
                        progress_callback(num_synced)

                    new_poll_results = []

                    for c_key in poll_results_to_save_map.keys():
                        for r_key in poll_results_to_save_map.get(
                                c_key, dict()):
                            obj_to_create = poll_results_to_save_map.get(
                                c_key, dict()).get(r_key, None)
                            if obj_to_create is not None:
                                new_poll_results.append(obj_to_create)

                    PollResult.objects.bulk_create(new_poll_results)

                    print "Processed fetch of %d - %d runs for poll #%d on org #%d" % (
                        num_synced - len(fetch), num_synced, poll.pk, org.pk)
                    fetch_start = time.time()
                    print "=" * 40

                    if num_synced >= Poll.POLL_RESULTS_MAX_SYNC_RUNS:
                        poll.rebuild_poll_results_counts()
                        cursor = fetches.get_cursor()

                        cache.set(
                            Poll.POLL_RESULTS_LAST_PULL_CURSOR %
                            (org.pk, poll.flow_uuid), cursor, None)

                        cache.set(
                            Poll.POLL_RESULTS_CURSOR_AFTER_CACHE_KEY %
                            (org.pk, poll.flow_uuid), after, None)

                        cache.set(
                            Poll.POLL_RESULTS_CURSOR_BEFORE_CACHE_KEY %
                            (org.pk, poll.flow_uuid), before, None)

                        cache.set(
                            Poll.POLL_RESULTS_BATCHES_LATEST_CACHE_KEY %
                            (org.pk, poll.flow_uuid), batches_latest, None)

                        print "Break pull results for poll #%d on org #%d in %ds, "\
                              " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"\
                              " Objects: created %d, updated %d, ignored %d. " \
                              "Before cursor %s" % (poll.pk, org.pk, time.time() - start, after, before, batches_latest,
                                                    latest_synced_obj_time, num_val_created, num_val_updated,
                                                    num_val_ignored, cursor)

                        return (num_val_created, num_val_updated,
                                num_val_ignored, num_path_created,
                                num_path_updated, num_path_ignored)

                if batches_latest is not None and (
                        latest_synced_obj_time is None
                        or json_date_to_datetime(latest_synced_obj_time) <=
                        json_date_to_datetime(batches_latest)):
                    latest_synced_obj_time = batches_latest

                # update the time for this poll from which we fetch next time
                cache.set(
                    Poll.POLL_RESULTS_LAST_PULL_CACHE_KEY %
                    (org.pk, poll.flow_uuid), latest_synced_obj_time, None)

                # clear the saved cursor
                cache.delete(Poll.POLL_RESULTS_LAST_PULL_CURSOR %
                             (org.pk, poll.flow_uuid))

                # from django.db import connection as db_connection, reset_queries
                # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10]
                # for q in slowest_queries:
                #     print "=" * 60
                #     print "\n\n\n"
                #     print "%s -- %s" % (q['time'], q['sql'])
                # reset_queries()

                print "Finished pulling results for poll #%d on org #%d runs in %ds, " \
                      "Times: sync_latest= %s," \
                      "Objects: created %d, updated %d, ignored %d" % (poll.pk, org.pk, time.time() - start,
                                                                       latest_synced_obj_time,
                                                                       num_val_created, num_val_updated,
                                                                       num_val_ignored)
        return num_val_created, num_val_updated, num_val_ignored, num_path_created, num_path_updated, num_path_ignored
示例#19
0
def pull_contacts(org, ignored_since, ignored_until):
    """
    Fetches updated contacts from RapidPro and updates local contacts accordingly
    """
    from ureport.contacts.models import ReportersCounter

    results = dict()

    backends = org.backends.filter(is_active=True)
    for backend_obj in backends:
        backend = org.get_backend(backend_slug=backend_obj.slug)

        last_fetch_date_key = Contact.CONTACT_LAST_FETCHED_CACHE_KEY % (org.pk, backend_obj.slug)

        until = datetime_to_json_date(timezone.now())
        since = cache.get(last_fetch_date_key, None)

        if not since:
            logger.info("First time run for org #%d. Will sync all contacts" % org.pk)

        start = time.time()

        fields_created, fields_updated, fields_deleted, ignored = backend.pull_fields(org)

        logger.info(
            "Fetched contact fields for org #%d. "
            "Created %s, Updated %s, Deleted %d, Ignored %d"
            % (org.pk, fields_created, fields_updated, fields_deleted, ignored)
        )
        logger.info("Fetch fields for org #%d took %ss" % (org.pk, time.time() - start))

        start_boundaries = time.time()

        boundaries_created, boundaries_updated, boundaries_deleted, ignored = backend.pull_boundaries(org)

        logger.info(
            "Fetched boundaries for org #%d. "
            "Created %s, Updated %s, Deleted %d, Ignored %d"
            % (org.pk, boundaries_created, boundaries_updated, boundaries_deleted, ignored)
        )

        logger.info("Fetch boundaries for org #%d took %ss" % (org.pk, time.time() - start_boundaries))
        start_contacts = time.time()

        contacts_created, contacts_updated, contacts_deleted, ignored = backend.pull_contacts(org, since, until)

        cache.set(last_fetch_date_key, until, None)

        logger.info(
            "Fetched contacts for org #%d. "
            "Created %s, Updated %s, Deleted %d, Ignored %d"
            % (org.pk, contacts_created, contacts_updated, contacts_deleted, ignored)
        )

        logger.info("Fetch contacts for org #%d took %ss" % (org.pk, time.time() - start_contacts))

        # Squash reporters counts
        ReportersCounter.squash_counts()

        results[backend_obj.slug] = {
            "fields": {"created": fields_created, "updated": fields_updated, "deleted": fields_deleted},
            "boundaries": {
                "created": boundaries_created,
                "updated": boundaries_updated,
                "deleted": boundaries_deleted,
            },
            "contacts": {"created": contacts_created, "updated": contacts_updated, "deleted": contacts_deleted},
        }

    return results
示例#20
0
    def pull_results(self, poll, modified_after, modified_before, progress_callback=None):
        org = poll.org
        r = get_redis_connection()
        key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid)

        stats_dict = dict(
            num_val_created=0,
            num_val_updated=0,
            num_val_ignored=0,
            num_path_created=0,
            num_path_updated=0,
            num_path_ignored=0,
            num_synced=0,
        )

        if r.get(key):
            logger.info("Skipping pulling results for poll #%d on org #%d as it is still running" % (poll.pk, org.pk))
        else:
            with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT):
                lock_expiration = time.time() + 0.8 * Poll.POLL_SYNC_LOCK_TIMEOUT
                client = self._get_client(org, 2)

                questions_uuids = poll.get_question_uuids()

                # ignore the TaskState time and use the time we stored in redis
                (
                    after,
                    before,
                    latest_synced_obj_time,
                    batches_latest,
                    resume_cursor,
                    pull_after_delete,
                ) = poll.get_pull_cached_params()

                if pull_after_delete is not None:
                    after = None
                    latest_synced_obj_time = None
                    batches_latest = None
                    resume_cursor = None
                    poll.delete_poll_results()
                    pull_refresh_from_archives.apply_async((poll.pk,), queue="sync")

                if resume_cursor is None:
                    before = datetime_to_json_date(timezone.now())
                    after = latest_synced_obj_time

                start = time.time()
                logger.info("Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk))

                poll_runs_query = client.get_runs(flow=poll.flow_uuid, after=after, before=before)
                fetches = poll_runs_query.iterfetches(retry_on_rate_exceed=True, resume_cursor=resume_cursor)

                try:
                    fetch_start = time.time()
                    for fetch in fetches:

                        logger.info(
                            "RapidPro API fetch for poll #%d "
                            "on org #%d %d - %d took %ds"
                            % (
                                poll.pk,
                                org.pk,
                                stats_dict["num_synced"],
                                stats_dict["num_synced"] + len(fetch),
                                time.time() - fetch_start,
                            )
                        )

                        contacts_map, poll_results_map, poll_results_to_save_map = self._initiate_lookup_maps(
                            fetch, org, poll
                        )

                        for temba_run in fetch:

                            if batches_latest is None or temba_run.modified_on > json_date_to_datetime(batches_latest):
                                batches_latest = datetime_to_json_date(temba_run.modified_on.replace(tzinfo=pytz.utc))

                            contact_obj = contacts_map.get(temba_run.contact.uuid, None)
                            self._process_run_poll_results(
                                org,
                                questions_uuids,
                                temba_run,
                                contact_obj,
                                poll_results_map,
                                poll_results_to_save_map,
                                stats_dict,
                            )

                        stats_dict["num_synced"] += len(fetch)
                        if progress_callback:
                            progress_callback(stats_dict["num_synced"])

                        self._save_new_poll_results_to_database(poll_results_to_save_map)

                        logger.info(
                            "Processed fetch of %d - %d "
                            "runs for poll #%d on org #%d"
                            % (stats_dict["num_synced"] - len(fetch), stats_dict["num_synced"], poll.pk, org.pk)
                        )
                        fetch_start = time.time()
                        logger.info("=" * 40)

                        if (
                            stats_dict["num_synced"] >= Poll.POLL_RESULTS_MAX_SYNC_RUNS
                            or time.time() > lock_expiration
                        ):
                            poll.rebuild_poll_results_counts()

                            cursor = fetches.get_cursor()
                            self._mark_poll_results_sync_paused(org, poll, cursor, after, before, batches_latest)

                            logger.info(
                                "Break pull results for poll #%d on org #%d in %ds, "
                                " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"
                                " Objects: created %d, updated %d, ignored %d. "
                                "Before cursor %s"
                                % (
                                    poll.pk,
                                    org.pk,
                                    time.time() - start,
                                    after,
                                    before,
                                    batches_latest,
                                    latest_synced_obj_time,
                                    stats_dict["num_val_created"],
                                    stats_dict["num_val_updated"],
                                    stats_dict["num_val_ignored"],
                                    cursor,
                                )
                            )

                            return (
                                stats_dict["num_val_created"],
                                stats_dict["num_val_updated"],
                                stats_dict["num_val_ignored"],
                                stats_dict["num_path_created"],
                                stats_dict["num_path_updated"],
                                stats_dict["num_path_ignored"],
                            )
                except TembaRateExceededError:
                    poll.rebuild_poll_results_counts()

                    cursor = fetches.get_cursor()
                    self._mark_poll_results_sync_paused(org, poll, cursor, after, before, batches_latest)

                    logger.info(
                        "Break pull results for poll #%d on org #%d in %ds, "
                        " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"
                        " Objects: created %d, updated %d, ignored %d. "
                        "Before cursor %s"
                        % (
                            poll.pk,
                            org.pk,
                            time.time() - start,
                            after,
                            before,
                            batches_latest,
                            latest_synced_obj_time,
                            stats_dict["num_val_created"],
                            stats_dict["num_val_updated"],
                            stats_dict["num_val_ignored"],
                            cursor,
                        )
                    )

                    return (
                        stats_dict["num_val_created"],
                        stats_dict["num_val_updated"],
                        stats_dict["num_val_ignored"],
                        stats_dict["num_path_created"],
                        stats_dict["num_path_updated"],
                        stats_dict["num_path_ignored"],
                    )

                if batches_latest is not None and (
                    latest_synced_obj_time is None
                    or json_date_to_datetime(latest_synced_obj_time) <= json_date_to_datetime(batches_latest)
                ):
                    latest_synced_obj_time = batches_latest

                self._mark_poll_results_sync_completed(poll, org, latest_synced_obj_time)

                # from django.db import connection as db_connection, reset_queries
                # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10]
                # for q in slowest_queries:
                #     print "=" * 60
                #     print "\n\n\n"
                #     print "%s -- %s" % (q['time'], q['sql'])
                # reset_queries()

                logger.info(
                    "Finished pulling results for poll #%d on org #%d runs in %ds, "
                    "Times: sync_latest= %s,"
                    "Objects: created %d, updated %d, ignored %d"
                    % (
                        poll.pk,
                        org.pk,
                        time.time() - start,
                        latest_synced_obj_time,
                        stats_dict["num_val_created"],
                        stats_dict["num_val_updated"],
                        stats_dict["num_val_ignored"],
                    )
                )
        return (
            stats_dict["num_val_created"],
            stats_dict["num_val_updated"],
            stats_dict["num_val_ignored"],
            stats_dict["num_path_created"],
            stats_dict["num_path_updated"],
            stats_dict["num_path_ignored"],
        )
示例#21
0
    def pull_results(self,
                     poll,
                     modified_after,
                     modified_before,
                     progress_callback=None):
        org = poll.org
        r = get_redis_connection()
        key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.flow_uuid)

        stats_dict = dict(
            num_val_created=0,
            num_val_updated=0,
            num_val_ignored=0,
            num_path_created=0,
            num_path_updated=0,
            num_path_ignored=0,
            num_synced=0,
        )

        if poll.stopped_syncing:
            return (
                stats_dict["num_val_created"],
                stats_dict["num_val_updated"],
                stats_dict["num_val_ignored"],
                stats_dict["num_path_created"],
                stats_dict["num_path_updated"],
                stats_dict["num_path_ignored"],
            )

        if r.get(key):
            logger.info(
                "Skipping pulling results for poll #%d on org #%d as it is still running"
                % (poll.pk, org.pk))
        else:
            with r.lock(key, timeout=Poll.POLL_SYNC_LOCK_TIMEOUT):
                lock_expiration = time.time(
                ) + 0.8 * Poll.POLL_SYNC_LOCK_TIMEOUT
                client = self._get_client(org, 2)

                questions_uuids = poll.get_question_uuids()

                # ignore the TaskState time and use the time we stored in redis
                (
                    after,
                    before,
                    latest_synced_obj_time,
                    batches_latest,
                    resume_cursor,
                    pull_after_delete,
                ) = poll.get_pull_cached_params()

                if pull_after_delete is not None:
                    after = None
                    latest_synced_obj_time = None
                    batches_latest = None
                    resume_cursor = None
                    poll.delete_poll_results()
                    pull_refresh_from_archives.apply_async((poll.pk, ),
                                                           queue="sync")

                if resume_cursor is None:
                    before = datetime_to_json_date(timezone.now())
                    after = latest_synced_obj_time

                start = time.time()
                logger.info("Start fetching runs for poll #%d on org #%d" %
                            (poll.pk, org.pk))

                poll_runs_query = client.get_runs(flow=poll.flow_uuid,
                                                  after=after,
                                                  before=before)
                fetches = poll_runs_query.iterfetches(
                    retry_on_rate_exceed=True, resume_cursor=resume_cursor)

                try:
                    fetch_start = time.time()
                    for fetch in fetches:

                        logger.info("RapidPro API fetch for poll #%d "
                                    "on org #%d %d - %d took %ds" % (
                                        poll.pk,
                                        org.pk,
                                        stats_dict["num_synced"],
                                        stats_dict["num_synced"] + len(fetch),
                                        time.time() - fetch_start,
                                    ))

                        contacts_map, poll_results_map, poll_results_to_save_map = self._initiate_lookup_maps(
                            fetch, org, poll)

                        for temba_run in fetch:

                            if batches_latest is None or temba_run.modified_on > json_date_to_datetime(
                                    batches_latest):
                                batches_latest = datetime_to_json_date(
                                    temba_run.modified_on.replace(
                                        tzinfo=pytz.utc))

                            contact_obj = contacts_map.get(
                                temba_run.contact.uuid, None)
                            self._process_run_poll_results(
                                org,
                                questions_uuids,
                                temba_run,
                                contact_obj,
                                poll_results_map,
                                poll_results_to_save_map,
                                stats_dict,
                            )

                        stats_dict["num_synced"] += len(fetch)
                        if progress_callback:
                            progress_callback(stats_dict["num_synced"])

                        self._save_new_poll_results_to_database(
                            poll_results_to_save_map)

                        logger.info(
                            "Processed fetch of %d - %d "
                            "runs for poll #%d on org #%d" %
                            (stats_dict["num_synced"] - len(fetch),
                             stats_dict["num_synced"], poll.pk, org.pk))
                        fetch_start = time.time()
                        logger.info("=" * 40)

                        if (stats_dict["num_synced"] >=
                                Poll.POLL_RESULTS_MAX_SYNC_RUNS
                                or time.time() > lock_expiration):
                            poll.rebuild_poll_results_counts()

                            cursor = fetches.get_cursor()
                            self._mark_poll_results_sync_paused(
                                org, poll, cursor, after, before,
                                batches_latest)

                            logger.info(
                                "Break pull results for poll #%d on org #%d in %ds, "
                                " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"
                                " Objects: created %d, updated %d, ignored %d. "
                                "Before cursor %s" % (
                                    poll.pk,
                                    org.pk,
                                    time.time() - start,
                                    after,
                                    before,
                                    batches_latest,
                                    latest_synced_obj_time,
                                    stats_dict["num_val_created"],
                                    stats_dict["num_val_updated"],
                                    stats_dict["num_val_ignored"],
                                    cursor,
                                ))

                            return (
                                stats_dict["num_val_created"],
                                stats_dict["num_val_updated"],
                                stats_dict["num_val_ignored"],
                                stats_dict["num_path_created"],
                                stats_dict["num_path_updated"],
                                stats_dict["num_path_ignored"],
                            )
                except TembaRateExceededError:
                    poll.rebuild_poll_results_counts()

                    cursor = fetches.get_cursor()
                    self._mark_poll_results_sync_paused(
                        org, poll, cursor, after, before, batches_latest)

                    logger.info(
                        "Break pull results for poll #%d on org #%d in %ds, "
                        " Times: after= %s, before= %s, batch_latest= %s, sync_latest= %s"
                        " Objects: created %d, updated %d, ignored %d. "
                        "Before cursor %s" % (
                            poll.pk,
                            org.pk,
                            time.time() - start,
                            after,
                            before,
                            batches_latest,
                            latest_synced_obj_time,
                            stats_dict["num_val_created"],
                            stats_dict["num_val_updated"],
                            stats_dict["num_val_ignored"],
                            cursor,
                        ))

                    return (
                        stats_dict["num_val_created"],
                        stats_dict["num_val_updated"],
                        stats_dict["num_val_ignored"],
                        stats_dict["num_path_created"],
                        stats_dict["num_path_updated"],
                        stats_dict["num_path_ignored"],
                    )

                if batches_latest is not None and (
                        latest_synced_obj_time is None
                        or json_date_to_datetime(latest_synced_obj_time) <=
                        json_date_to_datetime(batches_latest)):
                    latest_synced_obj_time = batches_latest

                self._mark_poll_results_sync_completed(poll, org,
                                                       latest_synced_obj_time)

                # from django.db import connection as db_connection, reset_queries
                # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10]
                # for q in slowest_queries:
                #     print "=" * 60
                #     print "\n\n\n"
                #     print "%s -- %s" % (q['time'], q['sql'])
                # reset_queries()

                logger.info(
                    "Finished pulling results for poll #%d on org #%d runs in %ds, "
                    "Times: sync_latest= %s,"
                    "Objects: created %d, updated %d, ignored %d" % (
                        poll.pk,
                        org.pk,
                        time.time() - start,
                        latest_synced_obj_time,
                        stats_dict["num_val_created"],
                        stats_dict["num_val_updated"],
                        stats_dict["num_val_ignored"],
                    ))
        return (
            stats_dict["num_val_created"],
            stats_dict["num_val_updated"],
            stats_dict["num_val_ignored"],
            stats_dict["num_path_created"],
            stats_dict["num_path_updated"],
            stats_dict["num_path_ignored"],
        )
示例#22
0
    def pull_results(self,
                     poll,
                     modified_after,
                     modified_before,
                     progress_callback=None):
        org = poll.org
        r = get_redis_connection()
        key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.pk)

        num_created = 0
        num_updated = 0
        num_ignored = 0
        num_synced = 0

        if r.get(key):
            print "Skipping pulling results for poll #%d on org #%d as it is still running" % (
                poll.pk, org.pk)
        else:
            with r.lock(key):
                client = self._get_client(org, 2)

                # ignore the TaskState time and use the time we stored in redis
                now = timezone.now()
                after = cache.get(
                    PollResult.POLL_RESULTS_LAST_PULL_CACHE_KEY %
                    (org.pk, poll.pk), None)

                pull_after_delete = cache.get(
                    Poll.POLL_PULL_ALL_RESULTS_AFTER_DELETE_FLAG %
                    (org.pk, poll.pk), None)
                if pull_after_delete is not None:
                    after = None
                    poll.delete_poll_results()

                start = time.time()
                print "Start fetching runs for poll #%d on org #%d" % (poll.pk,
                                                                       org.pk)

                poll_runs_query = client.get_runs(flow=poll.flow_uuid,
                                                  responded=True,
                                                  after=after,
                                                  before=now)
                fetches = poll_runs_query.iterfetches(
                    retry_on_rate_exceed=True)

                existing_poll_results = PollResult.objects.filter(
                    flow=poll.flow_uuid, org=poll.org_id)

                poll_results_map = defaultdict(dict)
                for res in existing_poll_results:
                    poll_results_map[res.contact][res.ruleset] = res

                poll_results_to_save_map = defaultdict(dict)

                fetch_start = time.time()
                for fetch in fetches:

                    print "RapidPro API fetch for poll #%d on org #%d %d - %d took %ds" % (
                        poll.pk, org.pk, num_synced, num_synced + len(fetch),
                        time.time() - fetch_start)

                    contact_uuids = [run.contact.uuid for run in fetch]
                    contacts = Contact.objects.filter(org=org,
                                                      uuid__in=contact_uuids)
                    contacts_map = {c.uuid: c for c in contacts}

                    for temba_run in fetch:
                        flow_uuid = temba_run.flow.uuid
                        contact_uuid = temba_run.contact.uuid
                        completed = temba_run.exit_type == 'completed'

                        contact_obj = contacts_map.get(contact_uuid, None)

                        state = ''
                        district = ''
                        ward = ''
                        if contact_obj is not None:
                            state = contact_obj.state
                            district = contact_obj.district
                            ward = contact_obj.ward

                        for temba_step in temba_run.steps:
                            ruleset_uuid = temba_step.node
                            category = temba_step.category
                            text = temba_step.text

                            existing_poll_result = poll_results_map.get(
                                contact_uuid, dict()).get(ruleset_uuid, None)

                            poll_result_to_save = poll_results_to_save_map.get(
                                contact_uuid, dict()).get(ruleset_uuid, None)

                            if existing_poll_result is not None:

                                update_required = existing_poll_result.category != category or existing_poll_result.text != text
                                update_required = update_required or existing_poll_result.state != state
                                update_required = update_required or existing_poll_result.district != district
                                update_required = update_required or existing_poll_result.ward != ward
                                update_required = update_required or existing_poll_result.completed != completed

                                # if the reporter answered the step, check if this is a newer run
                                if existing_poll_result.date is not None:
                                    update_required = update_required and (
                                        temba_step.left_on is None
                                        or temba_step.arrived_on >
                                        existing_poll_result.date)

                                if update_required:
                                    PollResult.objects.filter(
                                        pk=existing_poll_result.pk).update(
                                            category=category,
                                            text=text,
                                            state=state,
                                            district=district,
                                            ward=ward,
                                            date=temba_step.left_on,
                                            completed=completed)

                                    num_updated += 1
                                else:
                                    num_ignored += 1

                            elif poll_result_to_save is not None:

                                replace_save_map = poll_result_to_save.category != category or poll_result_to_save.text != text
                                replace_save_map = replace_save_map or poll_result_to_save.state != state
                                replace_save_map = replace_save_map or poll_result_to_save.district != district
                                replace_save_map = replace_save_map or poll_result_to_save.ward != ward
                                replace_save_map = replace_save_map or poll_result_to_save.completed != completed

                                # replace if the step is newer
                                if poll_result_to_save.date is not None:
                                    replace_save_map = replace_save_map and (
                                        temba_step.left_on is None
                                        or temba_step.arrived_on >
                                        poll_result_to_save.date)

                                if replace_save_map:
                                    result_obj = PollResult(
                                        org=org,
                                        flow=flow_uuid,
                                        ruleset=ruleset_uuid,
                                        contact=contact_uuid,
                                        category=category,
                                        text=text,
                                        state=state,
                                        district=district,
                                        ward=ward,
                                        date=temba_step.left_on,
                                        completed=completed)

                                    poll_results_to_save_map[contact_uuid][
                                        ruleset_uuid] = result_obj

                                num_ignored += 1
                            else:

                                result_obj = PollResult(
                                    org=org,
                                    flow=flow_uuid,
                                    ruleset=ruleset_uuid,
                                    contact=contact_uuid,
                                    category=category,
                                    text=text,
                                    state=state,
                                    district=district,
                                    ward=ward,
                                    date=temba_step.left_on,
                                    completed=completed)

                                poll_results_to_save_map[contact_uuid][
                                    ruleset_uuid] = result_obj

                                num_created += 1

                    num_synced += len(fetch)
                    if progress_callback:
                        progress_callback(num_synced)

                    print "Processed fetch of %d - %d runs for poll #%d on org #%d" % (
                        num_synced - len(fetch), num_synced, poll.pk, org.pk)
                    fetch_start = time.time()
                    print "=" * 40

                new_poll_results = []

                for c_key in poll_results_to_save_map.keys():
                    for r_key in poll_results_to_save_map.get(c_key, dict()):
                        obj_to_create = poll_results_to_save_map.get(
                            c_key, dict()).get(r_key, None)
                        if obj_to_create is not None:
                            new_poll_results.append(obj_to_create)

                PollResult.objects.bulk_create(new_poll_results)

                # update the time for this poll from which we fetch next time
                cache.set(
                    PollResult.POLL_RESULTS_LAST_PULL_CACHE_KEY %
                    (org.pk, poll.pk),
                    datetime_to_json_date(now.replace(tzinfo=pytz.utc)), None)

                # from django.db import connection as db_connection, reset_queries
                # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10]
                # for q in slowest_queries:
                #     print "=" * 60
                #     print "\n\n\n"
                #     print "%s -- %s" % (q['time'], q['sql'])
                # reset_queries()

                print "Finished pulling results for poll #%d on org #%d runs in %ds, " \
                      "created %d, updated %d, ignored %d" % (poll.pk, org.pk, time.time() - start, num_created,
                                                              num_updated, num_ignored)
        return num_created, num_updated, num_ignored
示例#23
0
    def pull_results(self, poll, modified_after, modified_before, progress_callback=None):
        org = poll.org
        r = get_redis_connection()
        key = Poll.POLL_PULL_RESULTS_TASK_LOCK % (org.pk, poll.pk)

        num_created = 0
        num_updated = 0
        num_ignored = 0
        num_synced = 0

        if r.get(key):
            print "Skipping pulling results for poll #%d on org #%d as it is still running" % (poll.pk, org.pk)
        else:
            with r.lock(key):
                client = self._get_client(org, 2)

                # ignore the TaskState time and use the time we stored in redis
                now = timezone.now()
                after = cache.get(PollResult.POLL_RESULTS_LAST_PULL_CACHE_KEY % (org.pk, poll.pk), None)

                pull_after_delete = cache.get(Poll.POLL_PULL_ALL_RESULTS_AFTER_DELETE_FLAG % (org.pk, poll.pk), None)
                if pull_after_delete is not None:
                    after = None
                    poll.delete_poll_results()

                start = time.time()
                print "Start fetching runs for poll #%d on org #%d" % (poll.pk, org.pk)

                poll_runs_query = client.get_runs(flow=poll.flow_uuid, responded=True, after=after, before=now)
                fetches = poll_runs_query.iterfetches(retry_on_rate_exceed=True)

                existing_poll_results = PollResult.objects.filter(flow=poll.flow_uuid, org=poll.org_id)

                poll_results_map = defaultdict(dict)
                for res in existing_poll_results:
                    poll_results_map[res.contact][res.ruleset] = res

                poll_results_to_save_map = defaultdict(dict)

                fetch_start = time.time()
                for fetch in fetches:

                    print "RapidPro API fetch for poll #%d on org #%d %d - %d took %ds" % (
                        poll.pk,
                        org.pk,
                        num_synced,
                        num_synced + len(fetch),
                        time.time() - fetch_start,
                    )

                    contact_uuids = [run.contact.uuid for run in fetch]
                    contacts = Contact.objects.filter(org=org, uuid__in=contact_uuids)
                    contacts_map = {c.uuid: c for c in contacts}

                    for temba_run in fetch:
                        flow_uuid = temba_run.flow.uuid
                        contact_uuid = temba_run.contact.uuid
                        completed = temba_run.exit_type == "completed"

                        contact_obj = contacts_map.get(contact_uuid, None)

                        state = ""
                        district = ""
                        ward = ""
                        if contact_obj is not None:
                            state = contact_obj.state
                            district = contact_obj.district
                            ward = contact_obj.ward

                        for temba_step in temba_run.steps:
                            ruleset_uuid = temba_step.node
                            category = temba_step.category
                            text = temba_step.text

                            existing_poll_result = poll_results_map.get(contact_uuid, dict()).get(ruleset_uuid, None)

                            poll_result_to_save = poll_results_to_save_map.get(contact_uuid, dict()).get(
                                ruleset_uuid, None
                            )

                            if existing_poll_result is not None:

                                update_required = (
                                    existing_poll_result.category != category or existing_poll_result.text != text
                                )
                                update_required = update_required or existing_poll_result.state != state
                                update_required = update_required or existing_poll_result.district != district
                                update_required = update_required or existing_poll_result.ward != ward
                                update_required = update_required or existing_poll_result.completed != completed

                                # if the reporter answered the step, check if this is a newer run
                                if existing_poll_result.date is not None:
                                    update_required = update_required and (
                                        temba_step.left_on is None or temba_step.arrived_on > existing_poll_result.date
                                    )

                                if update_required:
                                    PollResult.objects.filter(pk=existing_poll_result.pk).update(
                                        category=category,
                                        text=text,
                                        state=state,
                                        district=district,
                                        ward=ward,
                                        date=temba_step.left_on,
                                        completed=completed,
                                    )

                                    num_updated += 1
                                else:
                                    num_ignored += 1

                            elif poll_result_to_save is not None:

                                replace_save_map = (
                                    poll_result_to_save.category != category or poll_result_to_save.text != text
                                )
                                replace_save_map = replace_save_map or poll_result_to_save.state != state
                                replace_save_map = replace_save_map or poll_result_to_save.district != district
                                replace_save_map = replace_save_map or poll_result_to_save.ward != ward
                                replace_save_map = replace_save_map or poll_result_to_save.completed != completed

                                # replace if the step is newer
                                if poll_result_to_save.date is not None:
                                    replace_save_map = replace_save_map and (
                                        temba_step.left_on is None or temba_step.arrived_on > poll_result_to_save.date
                                    )

                                if replace_save_map:
                                    result_obj = PollResult(
                                        org=org,
                                        flow=flow_uuid,
                                        ruleset=ruleset_uuid,
                                        contact=contact_uuid,
                                        category=category,
                                        text=text,
                                        state=state,
                                        district=district,
                                        ward=ward,
                                        date=temba_step.left_on,
                                        completed=completed,
                                    )

                                    poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj

                                num_ignored += 1
                            else:

                                result_obj = PollResult(
                                    org=org,
                                    flow=flow_uuid,
                                    ruleset=ruleset_uuid,
                                    contact=contact_uuid,
                                    category=category,
                                    text=text,
                                    state=state,
                                    district=district,
                                    ward=ward,
                                    date=temba_step.left_on,
                                    completed=completed,
                                )

                                poll_results_to_save_map[contact_uuid][ruleset_uuid] = result_obj

                                num_created += 1

                    num_synced += len(fetch)
                    if progress_callback:
                        progress_callback(num_synced)

                    print "Processed fetch of %d - %d runs for poll #%d on org #%d" % (
                        num_synced - len(fetch),
                        num_synced,
                        poll.pk,
                        org.pk,
                    )
                    fetch_start = time.time()
                    print "=" * 40

                new_poll_results = []

                for c_key in poll_results_to_save_map.keys():
                    for r_key in poll_results_to_save_map.get(c_key, dict()):
                        obj_to_create = poll_results_to_save_map.get(c_key, dict()).get(r_key, None)
                        if obj_to_create is not None:
                            new_poll_results.append(obj_to_create)

                PollResult.objects.bulk_create(new_poll_results)

                # update the time for this poll from which we fetch next time
                cache.set(
                    PollResult.POLL_RESULTS_LAST_PULL_CACHE_KEY % (org.pk, poll.pk),
                    datetime_to_json_date(now.replace(tzinfo=pytz.utc)),
                    None,
                )

                # from django.db import connection as db_connection, reset_queries
                # slowest_queries = sorted(db_connection.queries, key=lambda q: q['time'], reverse=True)[:10]
                # for q in slowest_queries:
                #     print "=" * 60
                #     print "\n\n\n"
                #     print "%s -- %s" % (q['time'], q['sql'])
                # reset_queries()

                print "Finished pulling results for poll #%d on org #%d runs in %ds, " "created %d, updated %d, ignored %d" % (
                    poll.pk,
                    org.pk,
                    time.time() - start,
                    num_created,
                    num_updated,
                    num_ignored,
                )
        return num_created, num_updated, num_ignored