Exemplo n.º 1
0
    def test_submission_xls_to_csv(self):
        """Test that submission_xls_to_csv converts to csv"""
        c_csv_file = csv_import.submission_xls_to_csv(self.good_xls)

        c_csv_file.seek(0)
        c_csv_reader = ucsv.DictReader(c_csv_file, encoding='utf-8-sig')
        g_csv_reader = ucsv.DictReader(self.good_csv, encoding='utf-8-sig')

        self.assertEqual(g_csv_reader.fieldnames[10],
                         c_csv_reader.fieldnames[10])
Exemplo n.º 2
0
    def test_submission_xls_to_csv(self):
        """Test that submission_xls_to_csv converts to csv"""
        c_csv_file = csv_import.submission_xls_to_csv(
            self.good_xls)

        c_csv_file.seek(0)
        c_csv_reader = ucsv.DictReader(c_csv_file, encoding='utf-8-sig')
        g_csv_reader = ucsv.DictReader(self.good_csv, encoding='utf-8-sig')

        self.assertEqual(
            g_csv_reader.fieldnames[10], c_csv_reader.fieldnames[10])
Exemplo n.º 3
0
    def data_import(self, request, *args, **kwargs):
        """ Endpoint for CSV and XLS data imports
        Calls :py:func:`onadata.libs.utils.csv_import.submit_csv` for POST
        requests passing the `request.FILES.get('csv_file')` upload
        for import and
        :py:func:onadata.libs.utils.csv_import.get_async_csv_submission_status
        for GET requests passing `job_uuid` query param for job progress
        polling and
        :py:func:`onadata.libs.utils.csv_import.submission_xls_to_csv`
        for POST request passing the `request.FILES.get('xls_file')` upload for
        import if xls_file is provided instead of csv_file
        """
        self.object = self.get_object()
        resp = {}
        if request.method == 'GET':
            try:
                resp.update(
                    get_async_csv_submission_status(
                        request.query_params.get('job_uuid')))
                self.last_modified_date = timezone.now()
            except ValueError:
                raise ParseError(('The instance of the result is not a '
                                  'basestring; the job_uuid variable might '
                                  'be incorrect'))
        else:
            csv_file = request.FILES.get('csv_file', None)
            xls_file = request.FILES.get('xls_file', None)

            if csv_file is None and xls_file is None:
                resp.update({u'error': u'csv_file and xls_file field empty'})

            elif xls_file and \
                    xls_file.name.split('.')[-1] not in XLS_EXTENSIONS:
                resp.update({u'error': u'xls_file not an excel file'})

            elif csv_file and csv_file.name.split('.')[-1] != CSV_EXTENSION:
                resp.update({u'error': u'csv_file not a csv file'})

            else:
                if xls_file and xls_file.name.split('.')[-1] in XLS_EXTENSIONS:
                    csv_file = submission_xls_to_csv(xls_file)
                overwrite = request.query_params.get('overwrite')
                overwrite = True \
                    if overwrite and overwrite.lower() == 'true' else False
                size_threshold = settings.CSV_FILESIZE_IMPORT_ASYNC_THRESHOLD
                try:
                    csv_size = csv_file.size
                except AttributeError:
                    csv_size = csv_file.__sizeof__()
                if csv_size < size_threshold:
                    resp.update(
                        submit_csv(request.user.username, self.object,
                                   csv_file, overwrite))
                else:
                    csv_file.seek(0)
                    upload_to = os.path.join(request.user.username,
                                             'csv_imports', csv_file.name)
                    file_name = default_storage.save(upload_to, csv_file)
                    task = submit_csv_async.delay(request.user.username,
                                                  self.object.pk, file_name,
                                                  overwrite)
                    if task is None:
                        raise ParseError('Task not found')
                    else:
                        resp.update({u'task_id': task.task_id})

        return Response(data=resp,
                        status=status.HTTP_200_OK if resp.get('error') is None
                        else status.HTTP_400_BAD_REQUEST)
Exemplo n.º 4
0
    def data_import(self, request, *args, **kwargs):
        """ Endpoint for CSV and XLS data imports
        Calls :py:func:`onadata.libs.utils.csv_import.submit_csv` for POST
        requests passing the `request.FILES.get('csv_file')` upload
        for import and
        :py:func:onadata.libs.utils.csv_import.get_async_csv_submission_status
        for GET requests passing `job_uuid` query param for job progress
        polling and
        :py:func:`onadata.libs.utils.csv_import.submission_xls_to_csv`
        for POST request passing the `request.FILES.get('xls_file')` upload for
        import if xls_file is provided instead of csv_file
        """
        self.object = self.get_object()
        resp = {}
        if request.method == 'GET':
            try:
                resp.update(get_async_csv_submission_status(
                    request.query_params.get('job_uuid')))
                self.last_modified_date = timezone.now()
            except ValueError:
                raise ParseError(('The instance of the result is not a '
                                  'basestring; the job_uuid variable might '
                                  'be incorrect'))
        else:
            csv_file = request.FILES.get('csv_file', None)
            xls_file = request.FILES.get('xls_file', None)

            if csv_file is None and xls_file is None:
                resp.update({u'error': u'csv_file and xls_file field empty'})

            elif xls_file and \
                    xls_file.name.split('.')[-1] not in XLS_EXTENSIONS:
                resp.update({u'error': u'xls_file not an excel file'})

            elif csv_file and csv_file.name.split('.')[-1] != CSV_EXTENSION:
                resp.update({u'error': u'csv_file not a csv file'})

            else:
                if xls_file and xls_file.name.split('.')[-1] in XLS_EXTENSIONS:
                    csv_file = submission_xls_to_csv(xls_file)
                overwrite = request.query_params.get('overwrite')
                overwrite = True \
                    if overwrite and overwrite.lower() == 'true' else False
                size_threshold = settings.CSV_FILESIZE_IMPORT_ASYNC_THRESHOLD
                try:
                    csv_size = csv_file.size
                except AttributeError:
                    csv_size = csv_file.__sizeof__()
                if csv_size < size_threshold:
                    resp.update(submit_csv(request.user.username,
                                           self.object, csv_file, overwrite))
                else:
                    csv_file.seek(0)
                    upload_to = os.path.join(request.user.username,
                                             'csv_imports', csv_file.name)
                    file_name = default_storage.save(upload_to, csv_file)
                    task = submit_csv_async.delay(request.user.username,
                                                  self.object.pk, file_name,
                                                  overwrite)
                    if task is None:
                        raise ParseError('Task not found')
                    else:
                        resp.update({u'task_id': task.task_id})

        return Response(
            data=resp,
            status=status.HTTP_200_OK if resp.get('error') is None else
            status.HTTP_400_BAD_REQUEST)