示例#1
0
def build_form_multimedia_zip(
        domain,
        export_id,
        datespan,
        user_types,
        download_id,
        owner_id,
):
    from corehq.apps.export.models import FormExportInstance
    export = FormExportInstance.get(export_id)
    form_ids = get_form_ids_having_multimedia(
        domain, export.app_id, export.xmlns, datespan, user_types
    )
    forms_info = _get_form_attachment_info(domain, form_ids, export)

    num_forms = len(forms_info)
    DownloadBase.set_progress(build_form_multimedia_zip, 0, num_forms)

    all_case_ids = set.union(*(info['case_ids'] for info in forms_info)) if forms_info else set()
    case_id_to_name = _get_case_names(domain, all_case_ids)

    with TransientTempfile() as temp_path:
        with open(temp_path, 'wb') as f:
            _write_attachments_to_file(temp_path, num_forms, forms_info, case_id_to_name)
        with open(temp_path, 'rb') as f:
            zip_name = 'multimedia-{}'.format(unidecode(export.name))
            _save_and_expose_zip(f, zip_name, domain, download_id, owner_id)

    DownloadBase.set_progress(build_form_multimedia_zip, num_forms, num_forms)
示例#2
0
def _reconcile_es_data(data_type, metric, blob_parent_id, start=None, end=None, republish=True):
    today = date.today()
    if not start:
        two_days_ago = today - timedelta(days=2)
        start = two_days_ago.isoformat()
    with TransientTempfile() as file_path:
        with open(file_path, 'w') as output_file:
            call_command('stale_data_in_es', data_type, start=start, end=end, stdout=output_file)
        with open(file_path, 'r') as f:
            reader = csv.reader(f, delimiter='\t')
            # ignore the headers
            next(reader)
            counts_by_domain = defaultdict(int)
            for line in reader:
                domain = line[3]
                counts_by_domain[domain] += 1
            if counts_by_domain:
                for domain, count in counts_by_domain.items():
                    metrics_counter(metric, count, tags={'domain': domain})
            else:
                metrics_counter(metric, 0)
        if republish:
            call_command('republish_doc_changes', file_path, skip_domains=True)
        with open(file_path, 'rb') as f:
            blob_db = get_blob_db()
            key = f'{blob_parent_id}_{today.isoformat()}'
            six_years = 60 * 24 * 365 * 6
            blob_db.put(
                f,
                type_code=CODES.tempfile,
                domain='<unknown>',
                parent_id=blob_parent_id,
                key=key,
                timeout=six_years
            )
def assert_instance_gives_results(docs, export_instance, expected_result):
    with TransientTempfile() as temp_path:
        writer = get_export_writer([export_instance], temp_path)
        with writer.open([export_instance]):
            write_export_instance(writer, export_instance, docs)

        with ExportFile(writer.path, writer.format) as export:
            assert json.loads(export.read()) == expected_result
示例#4
0
 def build_export_json(self, query_master=False):
     with TransientTempfile() as temp_path:
         with open(temp_path, 'w+b') as f:
             self._write_data(f, query_master)
             f.seek(0)
             blob_ref, _ = IcdsFile.objects.get_or_create(blob_id=self._blob_id(), data_type='disha_dumps')
             blob_ref.store_file_in_blobdb(f, expired=DISHA_DUMP_EXPIRY)
             blob_ref.save()
示例#5
0
def rebuild_export(export_instance, filters=None):
    """
    Rebuild the given daily saved ExportInstance
    """
    filters = filters or export_instance.get_filters()
    with TransientTempfile() as temp_path:
        export_file = get_export_file([export_instance], filters or [], temp_path)
        with export_file as payload:
            save_export_payload(export_instance, payload)
示例#6
0
def populate_export_download_task(export_instances,
                                  filters,
                                  download_id,
                                  filename=None,
                                  expiry=10 * 60):
    """
    :param expiry:  Time period for the export to be available for download in minutes
    """
    domain = export_instances[0].domain
    with TransientTempfile() as temp_path, datadog_track_errors(
            'populate_export_download_task'):
        export_file = get_export_file(
            export_instances,
            filters,
            temp_path,
            # We don't have a great way to calculate progress if it's a bulk download,
            # so only track the progress for single instance exports.
            progress_tracker=populate_export_download_task
            if len(export_instances) == 1 else None)

        file_format = Format.from_format(export_file.format)
        filename = filename or export_instances[0].name

        with export_file as file_:
            db = get_blob_db()
            db.put(
                file_,
                domain=domain,
                parent_id=domain,
                type_code=CODES.data_export,
                key=download_id,
                timeout=expiry,
            )

            expose_blob_download(
                download_id,
                expiry=expiry * 60,
                mimetype=file_format.mimetype,
                content_disposition=safe_filename_header(
                    filename, file_format.extension),
                download_id=download_id,
            )

    email_requests = EmailExportWhenDoneRequest.objects.filter(
        domain=domain, download_id=download_id)
    for email_request in email_requests:
        try:
            couch_user = CouchUser.get_by_user_id(email_request.user_id,
                                                  domain=domain)
        except CouchUser.AccountTypeError:
            pass
        else:
            if couch_user is not None:
                process_email_request(domain, download_id,
                                      couch_user.get_email())
    email_requests.delete()
示例#7
0
def rebuild_export(export_instance, progress_tracker):
    """
    Rebuild the given daily saved ExportInstance
    """
    filters = export_instance.get_filters() or []
    es_filters = [f.to_es_filter() for f in filters]
    with TransientTempfile() as temp_path:
        export_file = get_export_file([export_instance], es_filters, temp_path, progress_tracker)
        with export_file as payload:
            save_export_payload(export_instance, payload)
示例#8
0
    def iter_export_docs():
        with TransientTempfile() as temp_path:
            with open(temp_path, 'w', encoding='utf-8') as f:
                for doc_id in scroll_result:
                    f.write(doc_id + '\n')

            # Stream doc ids from disk and fetch documents from ES in chunks
            with open(temp_path, 'r', encoding='utf-8') as f:
                doc_ids = (doc_id.strip() for doc_id in f)
                for doc in iter_es_docs(query.index, doc_ids):
                    yield doc
示例#9
0
def _generate_incremental_export(incremental_export):
    export_instance = incremental_export.export_instance
    export_instance.export_format = Format.UNZIPPED_CSV  # force to unzipped CSV
    checkpoint = incremental_export.last_valid_checkpoint

    # Remove the date period from the ExportInstance, since this is added automatically by Daily Saved exports
    export_instance.filters.date_period = None
    filters = export_instance.get_filters()
    if checkpoint:
        filters.append(
            ServerModifiedOnRangeFilter(gt=checkpoint.last_doc_date))

    class LastDocTracker:
        def __init__(self, doc_iterator):
            self.doc_iterator = doc_iterator
            self.last_doc = None
            self.doc_count = 0

        def __iter__(self):
            for doc in self.doc_iterator:
                self.last_doc = doc
                self.doc_count += 1
                yield doc

    with TransientTempfile() as temp_path, metrics_track_errors(
            'generate_incremental_exports'):
        writer = get_export_writer([export_instance],
                                   temp_path,
                                   allow_pagination=False)
        with writer.open([export_instance]):
            query = _get_export_query(export_instance, filters)
            query = query.sort('server_modified_on'
                               )  # reset sort to this instead of opened_on
            docs = LastDocTracker(query.run().hits)
            write_export_instance(writer, export_instance, docs)

        export_file = ExportFile(writer.path, writer.format)

        if docs.doc_count <= 0:
            return

        new_checkpoint = incremental_export.checkpoint(
            docs.doc_count, docs.last_doc.get('server_modified_on'))

        with export_file as file_:
            db = get_blob_db()
            db.put(file_,
                   domain=incremental_export.domain,
                   parent_id=new_checkpoint.blob_parent_id,
                   type_code=CODES.data_export,
                   key=str(new_checkpoint.blob_key),
                   timeout=24 * 60)
    return new_checkpoint
示例#10
0
def _generate_form_multimedia_zipfile(domain, export, form_ids, download_id, owner_id, task_name):
    forms_info = _get_form_attachment_info(domain, form_ids, export)

    num_forms = len(forms_info)
    DownloadBase.set_progress(task_name, 0, num_forms)

    all_case_ids = set.union(*(info['case_ids'] for info in forms_info)) if forms_info else set()
    case_id_to_name = _get_case_names(domain, all_case_ids)

    with TransientTempfile() as temp_path:
        with open(temp_path, 'wb') as f:
            _write_attachments_to_file(temp_path, num_forms, forms_info, case_id_to_name)
        with open(temp_path, 'rb') as f:
            zip_name = 'multimedia-{}'.format(unidecode(export.name))
            _save_and_expose_zip(f, zip_name, domain, download_id, owner_id)

    DownloadBase.set_progress(task_name, num_forms, num_forms)
示例#11
0
def generate_toggle_csv_download(self, tag, download_id, username):
    toggles = _get_toggles_with_tag(tag)
    total = _get_toggle_item_count(toggles)
    current_progress = [0]

    def increment_progress():
        current_progress[0] += 1
        DownloadBase.set_progress(self, current_progress[0], total)

    timeout_mins = 24 * 60
    with TransientTempfile() as temp_path:
        _write_toggle_data(temp_path, toggles, increment_progress)

        with open(temp_path, 'rb') as file:
            db = get_blob_db()
            meta = db.put(
                file,
                domain="__system__",
                parent_id="__system__",
                type_code=CODES.tempfile,
                key=download_id,
                timeout=timeout_mins,
            )

    now = datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S")
    filename = f'{settings.SERVER_ENVIRONMENT}_toggle_export_{now}'
    expose_blob_download(
        download_id,
        expiry=timeout_mins * 60,
        content_disposition=safe_filename_header(filename, ".csv"),
        download_id=download_id,
    )

    user = CouchUser.get_by_username(username)
    if user:
        url = absolute_reverse("retrieve_download", args=[download_id])
        url += "?get_file"
        valid_until = meta.expires_on.replace(
            tzinfo=pytz.UTC).strftime(USER_DATETIME_FORMAT)
        send_HTML_email("Feature Flag download ready",
                        user.get_email(),
                        html_content=inspect.cleandoc(f"""
        Download URL: {url}
        Download Valid until: {valid_until}
        """))
    def test_multi_table_order(self):
        tables = [
            TableConfiguration(
                label="My table {}".format(i),
                selected=True,
                path=[],
                columns=[
                    ExportColumn(
                        label="Q{}".format(i),
                        item=ScalarItem(
                            path=[PathNode(name='form'), PathNode(name='q{}'.format(i))],
                        ),
                        selected=True,
                    ),
                ]
            )
            for i in range(10)
        ]
        export_instance = FormExportInstance(
            export_format=Format.HTML,
            tables=tables
        )

        docs = [
            {
                'domain': 'my-domain',
                '_id': '1234',
                "form": {'q{}'.format(i): 'value {}'.format(i) for i in range(10)}
            }
        ]

        with TransientTempfile() as temp_path:
            writer = get_export_writer([export_instance], temp_path)
            with writer.open([export_instance]):
                write_export_instance(writer, export_instance, docs)
            with ExportFile(writer.path, writer.format) as export:
                exported_tables = [table for table in re.findall('<table>', export.read())]

        expected_tables = [t.label for t in tables]
        self.assertEqual(len(expected_tables), len(exported_tables))
示例#13
0
    def test_file_content(self, disha_get_rows_mock):
        class CountableList(list):
            # this will be mocked for a Queryset object which needs count method
            def count(self, *args, **kwargs):
                return len(self)

        data = [['a'], ['b'], ["d\xc3\xa9f"]]
        disha_get_rows_mock.return_value = CountableList(data)

        month = date(2018, 8, 1)
        state = 'Andhra pradesh'
        with TransientTempfile() as temp_path:
            dump = DishaDump(state, month)
            with open(temp_path, 'w+b') as f:
                dump._write_data_in_chunks(f)
            with open(temp_path, 'r', encoding='utf-8') as f:
                expected_json = {
                    'month': str(month),
                    'state_name': state,
                    'column_names': dump._get_columns(),
                    'rows': data
                }
                self.assertEqual(json.loads(f.read()), expected_json)
示例#14
0
def get_export_json(export_instance):
    with TransientTempfile() as temp_path:
        export_file = get_export_file([export_instance], [], temp_path)

        with export_file as export:
            return json.loads(export.read())
示例#15
0
    def test_simple_bulk_export(self, export_save):

        with TransientTempfile() as temp_path:
            export_file = get_export_file(
                [
                    CaseExportInstance(
                        export_format=Format.JSON,
                        domain=DOMAIN,
                        case_type=DEFAULT_CASE_TYPE,
                        tables=[
                            TableConfiguration(
                                selected=True,
                                label="My table",
                                path=MAIN_TABLE,
                                columns=[
                                    ExportColumn(
                                        label="Foo column",
                                        item=ExportItem(
                                            path=[PathNode(name="foo")]),
                                        selected=True,
                                    ),
                                ])
                        ]),
                    CaseExportInstance(
                        export_format=Format.JSON,
                        domain=DOMAIN,
                        case_type=DEFAULT_CASE_TYPE,
                        tables=[
                            TableConfiguration(
                                label="My table",
                                selected=True,
                                path=MAIN_TABLE,
                                columns=[
                                    ExportColumn(
                                        label="Bar column",
                                        item=ExportItem(
                                            path=[PathNode(name="bar")]),
                                        selected=True,
                                    )
                                ])
                        ]),
                ],
                [],  # No filters
                temp_path,
            )

            expected = {
                'Export1-My table': {
                    "A1": "Foo column",
                    "A2": "apple",
                    "A3": "apple",
                    "A4": "apple",
                },
                "Export2-My table": {
                    "A1": "Bar column",
                    "A2": "banana",
                    "A3": "banana",
                    "A4": "banana",
                },
            }

            with export_file as export:
                wb = load_workbook(export)
                self.assertEqual(wb.get_sheet_names(),
                                 ["Export1-My table", "Export2-My table"])

                for sheet in expected.keys():
                    for cell in expected[sheet].keys():
                        self.assertEqual(
                            wb[sheet][cell].value, expected[sheet][cell],
                            'AssertionError: Sheet "{}", cell "{}" expected: "{}", got "{}"'
                            .format(sheet, cell, expected[sheet][cell],
                                    wb[sheet][cell].value))
        self.assertTrue(export_save.called)
示例#16
0
def populate_export_download_task(domain,
                                  export_ids,
                                  exports_type,
                                  username,
                                  es_filters,
                                  download_id,
                                  owner_id,
                                  filename=None,
                                  expiry=10 * 60):
    """
    :param expiry:  Time period for the export to be available for download in minutes
    """

    email_requests = EmailExportWhenDoneRequest.objects.filter(
        domain=domain, download_id=download_id)

    if settings.STALE_EXPORT_THRESHOLD is not None and not email_requests.count(
    ):
        delay = get_task_time_to_start(
            populate_export_download_task.request.id)
        if delay.total_seconds() > settings.STALE_EXPORT_THRESHOLD:
            metrics_counter('commcare.exports.rejected_unfresh_export')
            raise RejectedStaleExport()

    export_instances = [
        get_export(exports_type, domain, export_id, username)
        for export_id in export_ids
    ]
    with TransientTempfile() as temp_path, metrics_track_errors(
            'populate_export_download_task'):
        export_file = get_export_file(
            export_instances,
            es_filters,
            temp_path,
            # We don't have a great way to calculate progress if it's a bulk download,
            # so only track the progress for single instance exports.
            progress_tracker=populate_export_download_task
            if len(export_instances) == 1 else None)

        file_format = Format.from_format(export_file.format)
        filename = filename or export_instances[0].name

        with export_file as file_:
            db = get_blob_db()
            db.put(
                file_,
                domain=domain,
                parent_id=domain,
                type_code=CODES.data_export,
                key=download_id,
                timeout=expiry,
            )

            expose_blob_download(
                download_id,
                expiry=expiry * 60,
                mimetype=file_format.mimetype,
                content_disposition=safe_filename_header(
                    filename, file_format.extension),
                download_id=download_id,
                owner_ids=[owner_id],
            )

    for email_request in email_requests:
        try:
            couch_user = CouchUser.get_by_user_id(email_request.user_id,
                                                  domain=domain)
        except CouchUser.AccountTypeError:
            pass
        else:
            if couch_user is not None:
                process_email_request(domain, download_id,
                                      couch_user.get_email())
    email_requests.delete()
示例#17
0
    def test_multiple_write_export_instance_calls(self, export_save):
        """
        Confirm that calling _write_export_instance() multiple times
        (as part of a bulk export) works as expected.
        """
        export_instances = [
            FormExportInstance(tables=[
                TableConfiguration(label="My table",
                                   selected=True,
                                   path=[],
                                   columns=[
                                       ExportColumn(
                                           label="Q3",
                                           item=ScalarItem(path=[
                                               PathNode(name='form'),
                                               PathNode(name='q3')
                                           ], ),
                                           selected=True,
                                       ),
                                   ]),
            ]),
            FormExportInstance(tables=[
                TableConfiguration(label="My other table",
                                   selected=True,
                                   path=[
                                       PathNode(name="form", is_repeat=False),
                                       PathNode(name="q2", is_repeat=False)
                                   ],
                                   columns=[
                                       ExportColumn(
                                           label="Q4",
                                           item=ScalarItem(path=[
                                               PathNode(name='form'),
                                               PathNode(name='q2'),
                                               PathNode(name='q4')
                                           ], ),
                                           selected=True,
                                       ),
                                   ])
            ]),
            FormExportInstance(tables=[
                TableConfiguration(label="My other table",
                                   selected=True,
                                   path=[
                                       PathNode(name="form", is_repeat=False),
                                       PathNode(name="q2", is_repeat=False)
                                   ],
                                   columns=[
                                       ExportColumn(
                                           label="Q4",
                                           item=ScalarItem(path=[
                                               PathNode(name='form'),
                                               PathNode(name='q2'),
                                               PathNode(name='q4')
                                           ], ),
                                           selected=True,
                                       ),
                                   ])
            ])
        ]

        with TransientTempfile() as temp_path:
            writer = _ExportWriter(get_writer(Format.JSON), temp_path)
            with writer.open(export_instances):
                write_export_instance(writer, export_instances[0], self.docs)
                write_export_instance(writer, export_instances[1], self.docs)
                write_export_instance(writer, export_instances[2], self.docs)

            with ExportFile(writer.path, writer.format) as export:
                self.assertEqual(
                    json.loads(export.read()), {
                        'My table': {
                            'headers': ['Q3'],
                            'rows': [['baz'], ['bop']],
                        },
                        'Export2-My other table': {
                            'headers': ['Q4'],
                            'rows': [['bar'], ['boop']],
                        },
                        'Export3-My other table': {
                            'headers': ['Q4'],
                            'rows': [['bar'], ['boop']],
                        },
                    })
        self.assertTrue(export_save.called)