Exemplo n.º 1
0
def convert_csv_to_xls(csv_repr):
    """
    This method should be moved into pyxform
    """
    def _add_contents_to_sheet(sheet, contents):
        cols = []
        for row in contents:
            for key in row.keys():
                if key not in cols:
                    cols.append(key)
        for ci, col in enumerate(cols):
            sheet.write(0, ci, col)
        for ri, row in enumerate(contents):
            for ci, col in enumerate(cols):
                val = row.get(col, None)
                if val:
                    sheet.write(ri+1, ci, val)
    encoded_csv = csv_repr.decode("utf-8").encode("utf-8")
    dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(encoded_csv))
    workbook = xlwt.Workbook()
    for sheet_name in dict_repr.keys():
        # pyxform.xls2json_backends adds "_header" items for each sheet.....
        if not re.match(r".*_header$", sheet_name):
            cur_sheet = workbook.add_sheet(sheet_name)
            _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name])
    # TODO: As XLS files are binary, I believe this should be `io.BytesIO()`.
    string_io = StringIO.StringIO()
    workbook.save(string_io)
    string_io.seek(0)
    return string_io
Exemplo n.º 2
0
def convert_csv_to_xls(csv_repr):
    """
    This method should be moved into pyxform
    """
    def _add_contents_to_sheet(sheet, contents):
        cols = []
        for row in contents:
            for key in row.keys():
                if key not in cols:
                    cols.append(key)
        for ci, col in enumerate(cols):
            sheet.write(0, ci, col)
        for ri, row in enumerate(contents):
            for ci, col in enumerate(cols):
                val = row.get(col, None)
                if val:
                    sheet.write(ri + 1, ci, val)

    encoded_csv = csv_repr.decode("utf-8").encode("utf-8")
    dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(encoded_csv))
    workbook = xlwt.Workbook()
    for sheet_name in dict_repr.keys():
        # pyxform.xls2json_backends adds "_header" items for each sheet.....
        if not re.match(r".*_header$", sheet_name):
            cur_sheet = workbook.add_sheet(sheet_name)
            _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name])
    string_io = StringIO.StringIO()
    workbook.save(string_io)
    string_io.seek(0)
    return string_io
Exemplo n.º 3
0
def csv_to_xls(csv_repr):
    csv_repr = ''.join([
        line for line in csv_repr if line.strip().strip('"')
    ])

    def _add_contents_to_sheet(sheet, contents):
        cols = []
        for row in contents:
            for key in row.keys():
                if key not in cols:
                    cols.append(key)
        for ci, col in enumerate(cols):
            sheet.write(0, ci, col)
        for ri, row in enumerate(contents):
            for ci, col in enumerate(cols):
                val = row.get(col, None)
                if val:
                    sheet.write(ri + 1, ci, val)

    encoded_csv = csv_repr.decode("utf-8").encode("utf-8")
    dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(encoded_csv))
    workbook = xlwt.Workbook()
    for sheet_name in dict_repr.keys():
        # pyxform.xls2json_backends adds "_header" items for each sheet.....
        if not re.match(r".*_header$", sheet_name):
            cur_sheet = workbook.add_sheet(sheet_name)
            _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name])
    # TODO: As XLS files are binary, I believe this should be `io.BytesIO()`.
    string_io = StringIO.StringIO()
    workbook.save(string_io)
    string_io.seek(0)
    return string_io
Exemplo n.º 4
0
 def test_a_unicode_csv_works(self):
     """
     Simply tests that xls2json_backends.csv_to_dict does not have a problem
     with a csv with unicode characters
     """
     utf_csv_path = utils.path_to_text_fixture("utf_csv.csv")
     dict_value = csv_to_dict(utf_csv_path)
     self.assertTrue("\\ud83c" in json.dumps(dict_value))
Exemplo n.º 5
0
 def test_a_unicode_csv_works(self):
     """
     Simply tests that xls2json_backends.csv_to_dict does not have a problem
     with a csv with unicode characters
     """
     utf_csv_path = utils.path_to_text_fixture("utf_csv.csv")
     dict_value = csv_to_dict(utf_csv_path)
     self.assertTrue("\ud83c" in json.dumps(dict_value))
Exemplo n.º 6
0
def convert_csv_to_ss_structure(csv_repr):
    dict_repr = dict(
        xls2json_backends.csv_to_dict(
            StringIO.StringIO(csv_repr.encode("utf-8"))))
    for key in dict_repr.keys():
        if re.match('.*_header$', key):
            del dict_repr[key]
    return dict_repr
Exemplo n.º 7
0
def create_survey_from_csv_text(
    csv_text,
    default_name='KoBoFormSurvey',
    default_language=u'default',
    warnings=None,
):
    workbook_dict = xls2json_backends.csv_to_dict(
        StringIO.StringIO(csv_text.encode("utf-8")))
    return create_survey_from_ss_struct(workbook_dict, default_name,
                                        default_language, warnings)
Exemplo n.º 8
0
 def test_equivalency(self):
     equivalent_fixtures = ['group', 'loop',  #'gps',
             'specify_other', 'include', 'text_and_integer', \
             'include_json', 'yes_or_no_question']
     for fixture in equivalent_fixtures:
         xls_path = utils.path_to_text_fixture("%s.xls" % fixture)
         csv_path = utils.path_to_text_fixture("%s.csv" % fixture)
         xls_inp = xls_to_dict(xls_path)
         csv_inp = csv_to_dict(csv_path)
         self.assertEqual(csv_inp, xls_inp)
Exemplo n.º 9
0
 def _parse_input(self):
     if self.filetype == "xls":
         self._dict = xls_to_dict(self._path)
     elif self.filetype == "csv":
         self._dict = csv_to_dict(self._path)
     self._sheet_names = self._dict.keys()
     self._set_choices_and_columns_sheet_name()
     self._strip_unicode_values()
     self._fix_int_values()
     self._group_dictionaries()
Exemplo n.º 10
0
 def test_equivalency(self):
     equivalent_fixtures = ['group', 'loop',  #'gps',
             'specify_other', 'include', 'text_and_integer', \
             'include_json', 'yes_or_no_question']
     for fixture in equivalent_fixtures:
         xls_path = utils.path_to_text_fixture("%s.xls" % fixture)
         csv_path = utils.path_to_text_fixture("%s.csv" % fixture)
         xls_inp = xls_to_dict(xls_path)
         csv_inp = csv_to_dict(csv_path)
         self.maxDiff = None
         self.assertEqual(csv_inp, xls_inp)
Exemplo n.º 11
0
def convert_csv_to_xls(csv_repr):
    dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(csv_repr.encode("utf-8")))
    workbook = xlwt.Workbook()
    for sheet_name in dict_repr.keys():
        # pyxform.xls2json_backends adds "_header" items for each sheet.....
        if not re.match(r".*_header$", sheet_name):
            cur_sheet = workbook.add_sheet(sheet_name)
            _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name])
    string_io = StringIO.StringIO()
    workbook.save(string_io)
    string_io.seek(0)
    return string_io
Exemplo n.º 12
0
    def test_xls_to_dict(self):
        # convert a CSV to XLS using our new method
        new_xls = pyxform_utils.convert_csv_to_xls(simple_yn)

        # convert our new XLS to dict (using pyxform)
        xls_dict = xls2json_backends.xls_to_dict(new_xls)
        # convert the original CSV to dict (using pyxform)
        csv_dict = xls2json_backends.csv_to_dict(StringIO(simple_yn))
        # Our function, "pyxform_utils.csv_to_xls" performs (CSV -> XLS)
        # This assertion tests equivalence of
        #   (CSV) -> dict_representation
        #   (CSV -> XLS) -> dict_representation
        self.assertEqual(csv_dict, xls_dict)
Exemplo n.º 13
0
    def publish(self, user, id_string=None):
        if self.is_valid():
            # If a text (csv) representation of the xlsform is present,
            # this will save the file and pass it instead of the 'xls_file'
            # field.
            if 'text_xls_form' in self.cleaned_data\
               and self.cleaned_data['text_xls_form'].strip():
                csv_data = self.cleaned_data['text_xls_form']
                # "Note that any text-based field - such as CharField or
                # EmailField - always cleans the input into a Unicode string"
                # (https://docs.djangoproject.com/en/1.8/ref/forms/api/#django.forms.Form.cleaned_data).
                csv_data = csv_data.encode('utf-8')
                # requires that csv forms have a settings with an id_string or
                # form_id
                _sheets = csv_to_dict(StringIO(csv_data))
                try:
                    _settings = _sheets['settings'][0]
                    if 'id_string' in _settings:
                        _name = '%s.csv' % _settings['id_string']
                    else:
                        _name = '%s.csv' % _settings['form_id']
                except (KeyError, IndexError) as e:
                    raise ValueError('CSV XLSForms must have a settings sheet'
                                     ' and id_string or form_id')

                cleaned_xls_file = \
                    default_storage.save(
                        upload_to(None, _name, user.username),
                        ContentFile(csv_data))
            else:
                cleaned_xls_file = self.cleaned_data['xls_file']

            if not cleaned_xls_file:
                cleaned_url = self.cleaned_data['xls_url']
                if cleaned_url.strip() == '':
                    cleaned_url = self.cleaned_data['dropbox_xls_url']
                cleaned_xls_file = urlparse(cleaned_url)
                cleaned_xls_file = \
                    '_'.join(cleaned_xls_file.path.split('/')[-2:])
                if cleaned_xls_file[-4:] != '.xls':
                    cleaned_xls_file += '.xls'
                cleaned_xls_file = \
                    upload_to(None, cleaned_xls_file, user.username)
                self.validate(cleaned_url)
                xls_data = ContentFile(urllib2.urlopen(cleaned_url).read())
                cleaned_xls_file = \
                    default_storage.save(cleaned_xls_file, xls_data)
            # publish the xls
            return publish_xls_form(cleaned_xls_file, user, id_string)
Exemplo n.º 14
0
    def publish(self, user, id_string=None):
        if self.is_valid():
            # If a text (csv) representation of the xlsform is present,
            # this will save the file and pass it instead of the 'xls_file'
            # field.
            if 'text_xls_form' in self.cleaned_data\
               and self.cleaned_data['text_xls_form'].strip():
                csv_data = self.cleaned_data['text_xls_form']
                # "Note that any text-based field - such as CharField or
                # EmailField - always cleans the input into a Unicode string"
                # (https://docs.djangoproject.com/en/1.8/ref/forms/api/#django.forms.Form.cleaned_data).
                csv_data = csv_data.encode('utf-8')
                # requires that csv forms have a settings with an id_string or
                # form_id
                _sheets = csv_to_dict(StringIO(csv_data))
                try:
                    _settings = _sheets['settings'][0]
                    if 'id_string' in _settings:
                        _name = '%s.csv' % _settings['id_string']
                    else:
                        _name = '%s.csv' % _settings['form_id']
                except (KeyError, IndexError) as e:
                    raise ValueError('CSV XLSForms must have a settings sheet'
                                     ' and id_string or form_id')

                cleaned_xls_file = \
                    default_storage.save(
                        upload_to(None, _name, user.username),
                        ContentFile(csv_data))
            else:
                cleaned_xls_file = self.cleaned_data['xls_file']

            if not cleaned_xls_file:
                cleaned_url = self.cleaned_data['xls_url']
                if cleaned_url.strip() == u'':
                    cleaned_url = self.cleaned_data['dropbox_xls_url']
                cleaned_xls_file = urlparse(cleaned_url)
                cleaned_xls_file = \
                    '_'.join(cleaned_xls_file.path.split('/')[-2:])
                if cleaned_xls_file[-4:] != '.xls':
                    cleaned_xls_file += '.xls'
                cleaned_xls_file = \
                    upload_to(None, cleaned_xls_file, user.username)
                self.validate(cleaned_url)
                xls_data = ContentFile(urllib2.urlopen(cleaned_url).read())
                cleaned_xls_file = \
                    default_storage.save(cleaned_xls_file, xls_data)
            # publish the xls
            return publish_xls_form(cleaned_xls_file, user, id_string)
Exemplo n.º 15
0
 def test_equivalency(self):
     equivalent_fixtures = [
         "group",
         "loop",  #'gps',
         "specify_other",
         "include",
         "text_and_integer",
         "include_json",
         "yes_or_no_question",
     ]
     for fixture in equivalent_fixtures:
         xls_path = utils.path_to_text_fixture("%s.xls" % fixture)
         csv_path = utils.path_to_text_fixture("%s.csv" % fixture)
         xls_inp = xls_to_dict(xls_path)
         csv_inp = csv_to_dict(csv_path)
         self.maxDiff = None
         self.assertEqual(csv_inp, xls_inp)
Exemplo n.º 16
0
 def test_equivalency(self):
     equivalent_fixtures = [
         "group",
         "loop",  # 'gps',
         "specify_other",
         "include",
         "text_and_integer",
         "include_json",
         "yes_or_no_question",
     ]
     for fixture in equivalent_fixtures:
         xls_path = utils.path_to_text_fixture("%s.xls" % fixture)
         csv_path = utils.path_to_text_fixture("%s.csv" % fixture)
         xls_inp = xls_to_dict(xls_path)
         csv_inp = csv_to_dict(csv_path)
         self.maxDiff = None
         self.assertEqual(csv_inp, xls_inp)
Exemplo n.º 17
0
def _xform_to_asset_content(xform):
    # Load the xlsform from the KC API to avoid having to deal
    # with S3 credentials, etc.
    user = xform.user
    response = _kc_forms_api_request(user.auth_token, xform.pk, xlsform=True)
    if response.status_code == 404:
        raise SyncKCXFormsWarning(u'unable to load xls ({})'.format(
            response.status_code))
    elif response.status_code != 200:
        raise SyncKCXFormsError(u'unable to load xls ({})'.format(
            response.status_code))
    # Convert the xlsform to KPI JSON
    xls_io = io.BytesIO(response.content)
    if xform.xls.name.endswith('.csv'):
        dict_repr = xls2json_backends.csv_to_dict(xls_io)
        xls_io = _convert_dict_to_xls(dict_repr)
    asset_content = _xlsform_to_kpi_content_schema(xls_io)
    return asset_content
Exemplo n.º 18
0
def _xform_to_asset_content(xform):
    # Load the xlsform from the KC API to avoid having to deal
    # with S3 credentials, etc.
    user = xform.user
    response = _kc_forms_api_request(user.auth_token, xform.pk, xlsform=True)
    if response.status_code == 404:
        raise SyncKCXFormsWarning(
            u'unable to load xls ({})'.format(response.status_code)
        )
    elif response.status_code != 200:
        raise SyncKCXFormsError(
            u'unable to load xls ({})'.format(response.status_code)
        )
    # Convert the xlsform to KPI JSON
    xls_io = io.BytesIO(response.content)
    if xform.xls.name.endswith('.csv'):
        dict_repr = xls2json_backends.csv_to_dict(xls_io)
        xls_io = _convert_dict_to_xls(dict_repr)
    asset_content = _xlsform_to_kpi_content_schema(xls_io)
    return asset_content
Exemplo n.º 19
0
def parse_file_to_workbook_dict(path, file_object=None):
    """
    Given a xls or csv workbook file use xls2json_backends to create
    a python workbook_dict.
    workbook_dicts are organized as follows:
    {sheetname : [{column_header : column_value_in_array_indexed_row}]}
    """
    (filepath, filename) = os.path.split(path)
    if not filename:
        raise PyXFormError("No filename.")
    (shortname, extension) = os.path.splitext(filename)
    if not extension:
        raise PyXFormError("No extension.")

    if extension == ".xls" or extension == ".xlsx":
        return xls_to_dict(file_object if file_object is not None else path)
    elif extension == ".csv":
        return csv_to_dict(file_object if file_object is not None else path)
    else:
        raise PyXFormError("File was not recognized")
Exemplo n.º 20
0
def convert_csv_to_xls(csv_repr):
    """
    This method should be moved into pyxform
    """
    # There should not be any blank lines in the "sheeted" CSV representation,
    # but often times there are. Strip them out before any further processing;
    # otherwise, `convert_csv_to_xls()` will raise an
    # `invalid worksheet name ''` exception
    csv_repr = ''.join([
        line for line in csv_repr.splitlines(True) if line.strip().strip('"')
    ])

    def _add_contents_to_sheet(sheet, contents):
        cols = []
        for row in contents:
            for key in row.keys():
                if key not in cols:
                    cols.append(key)
        for ci, col in enumerate(cols):
            sheet.write(0, ci, col)
        for ri, row in enumerate(contents):
            for ci, col in enumerate(cols):
                val = row.get(col, None)
                if val:
                    sheet.write(ri + 1, ci, val)

    encoded_csv = io.BytesIO(csv_repr)
    dict_repr = xls2json_backends.csv_to_dict(encoded_csv)

    workbook = xlwt.Workbook()
    for sheet_name in dict_repr.keys():
        # pyxform.xls2json_backends adds "_header" items for each sheet.....
        if not re.match(r".*_header$", sheet_name):
            cur_sheet = workbook.add_sheet(sheet_name)
            _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name])

    bytes_io = io.BytesIO()
    workbook.save(bytes_io)
    bytes_io.seek(0)
    return bytes_io
Exemplo n.º 21
0
def convert_csv_to_xls(csv_repr):
    """
    This method should be moved into pyxform
    """
    # There should not be any blank lines in the "sheeted" CSV representation,
    # but often times there are. Strip them out before any further processing;
    # otherwise, `convert_csv_to_xls()` will raise an
    # `invalid worksheet name u''` exception
    csv_repr = ''.join([
        line for line in csv_repr.splitlines(True) if line.strip().strip('"')
    ])
    def _add_contents_to_sheet(sheet, contents):
        cols = []
        for row in contents:
            for key in row.keys():
                if key not in cols:
                    cols.append(key)
        for ci, col in enumerate(cols):
            sheet.write(0, ci, col)
        for ri, row in enumerate(contents):
            for ci, col in enumerate(cols):
                val = row.get(col, None)
                if val:
                    sheet.write(ri+1, ci, val)
    encoded_csv = csv_repr.decode("utf-8").encode("utf-8")
    dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(encoded_csv))
    workbook = xlwt.Workbook()
    for sheet_name in dict_repr.keys():
        # pyxform.xls2json_backends adds "_header" items for each sheet.....
        if not re.match(r".*_header$", sheet_name):
            cur_sheet = workbook.add_sheet(sheet_name)
            _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name])
    # TODO: As XLS files are binary, I believe this should be `io.BytesIO()`.
    string_io = StringIO.StringIO()
    workbook.save(string_io)
    string_io.seek(0)
    return string_io
Exemplo n.º 22
0
def convert_csv_to_xls(csv_repr):
    dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(csv_repr.encode("utf-8")))
    return convert_dict_to_xls(dict_repr)
Exemplo n.º 23
0
def _csv_to_dict(content):
    out_dict = {}
    for (key, sheet) in csv_to_dict(StringIO(content.encode('utf-8'))).items():
        if not re.search(r'_header$', key):
            out_dict[key] = sheet
    return out_dict
Exemplo n.º 24
0
def create_survey_from_csv_text(csv_text, default_name='KoBoFormSurvey', default_language=u'default', warnings=None, ):
    workbook_dict = xls2json_backends.csv_to_dict(StringIO.StringIO(csv_text.encode("utf-8")))
    return create_survey_from_ss_struct(workbook_dict, default_name, default_language, warnings)
Exemplo n.º 25
0
def create_survey_from_csv_text(csv_text, default_name='KoBoFormSurvey', default_language=u'default', warnings=None, ):
    workbook_dict = xls2json_backends.csv_to_dict(StringIO.StringIO(csv_text.encode("utf-8")))
    dict_repr = xls2json.workbook_to_json(workbook_dict, default_name, default_language, warnings)
    dict_repr[u'name'] = dict_repr[u'id_string']
    return builder.create_survey_element_from_dict(dict_repr)
Exemplo n.º 26
0
    def handle(self, *args, **options):
        if not settings.KOBOCAT_URL or not settings.KOBOCAT_INTERNAL_URL:
            raise ImproperlyConfigured(
                'Both KOBOCAT_URL and KOBOCAT_INTERNAL_URL must be '
                'configured before using this command'
            )
        if options.get('quiet'):
            # Do not output anything
            def print_str(string): pass
        else:
            # Output status messages
            def print_str(string): print string

        def print_tabular(*args):
            print_str(u'\t'.join(map(lambda x: u'{}'.format(x), args)))

        users = User.objects.all()
        print_str('%d total users' % users.count())
        # A specific user or everyone?
        if options.get('username'):
            users = User.objects.filter(username=options.get('username'))
        print_str('%d users selected' % users.count())
        # Only users who prefer KPI or all users?
        if not options.get('all_users'):
            users = users.filter(
                models.Q(formbuilderpreference__preferred_builder=
                    FormBuilderPreference.KPI) |
                models.Q(formbuilderpreference=None) # KPI is the default now
            )
            print_str('%d of selected users prefer KPI' % users.count())

        # We'll be copying the date fields from KC, so don't auto-update them
        _set_auto_field_update(Asset, "date_created", False)
        _set_auto_field_update(Asset, "date_modified", False)

        for user in users:
            (token, created) = Token.objects.get_or_create(user=user)
            existing_surveys = user.assets.filter(asset_type='survey')

            # Each asset that the user has already deployed to KC should have a
            # form uuid stored in its deployment data
            kpi_deployed_uuids = {}
            for existing_survey in existing_surveys:
                dd = existing_survey._deployment_data
                if 'backend_response' in dd:
                    kpi_deployed_uuids[dd['backend_response']['uuid']] = \
                        existing_survey.pk
            # Use our stub model to access KC's XForm objects
            xforms = user.xforms.all()
            for xform in xforms:
                try:
                    update_existing = False
                    if xform.uuid in kpi_deployed_uuids:
                        # This KC form already has a corresponding KPI asset,
                        # but the user may have directly updated the form on KC
                        # after deploying from KPI. If so, then the KPI asset
                        # must be updated with the contents of the KC form
                        asset = user.assets.get(
                            pk=kpi_deployed_uuids[xform.uuid])
                        time_diff = xform.date_modified - asset.date_modified
                        # Format the timedelta in a sane way, per
                        # http://stackoverflow.com/a/8408947
                        if time_diff < datetime.timedelta(0):
                            time_diff_str = '-{}'.format(-time_diff)
                        else:
                            time_diff_str = '+{}'.format(time_diff)
                        # If KC timestamp is not sufficiently ahead of the KPI
                        # timestamp, we assume the KC form content was not
                        # updated since the last KPI deployment
                        if time_diff <= TIMESTAMP_DIFFERENCE_TOLERANCE:
                            print_tabular(
                                'NOOP',
                                user.username,
                                xform.id_string,
                                asset.uid,
                                time_diff_str
                            )
                            continue
                        else:
                            update_existing = True
                    # Load the xlsform from the KC API to avoid having to deal
                    # with S3 credentials, etc.
                    response = kc_forms_api_request(
                        token, xform.pk, xlsform=True)
                    if response.status_code != 200:
                        error_information = [
                            'FAIL',
                            user.username,
                            xform.id_string,
                            u'unable to load xls ({})'.format(
                                response.status_code)
                        ]
                        print_tabular(*error_information)
                        logging.warning(u'sync_kobocat_xforms: {}'.format(
                            u', '.join(error_information)))
                        continue
                    # Convert the xlsform to KPI JSON
                    xls_io = io.BytesIO(response.content)
                    if xform.xls.name.endswith('.csv'):
                        dict_repr = xls2json_backends.csv_to_dict(xls_io)
                        xls_io = convert_dict_to_xls(dict_repr)
                    asset_content = xlsform_to_kpi_content_schema(xls_io)
                    # Get the form data from KC
                    response = kc_forms_api_request(token, xform.pk)
                    if response.status_code != 200:
                        error_information = [
                            'FAIL',
                            user.username,
                            xform.id_string,
                            'unable to load form data ({})'.format(
                                response.status_code)
                        ]
                        print_tabular(*error_information)
                        logging.error(u'sync_kobocat_xforms: {}'.format(
                            u', '.join(error_information)))
                        continue
                    deployment_data = response.json()
                    with transaction.atomic():
                        if not update_existing:
                            # This is an orphaned KC form. Build a new asset to
                            # match it
                            asset = Asset()
                            asset.asset_type = 'survey'
                            asset.owner = user
                            asset.date_created = dateutil.parser.parse(
                                deployment_data['date_created'])
                        # Update the asset's modification date and content
                        # regardless of whether it's a new asset or an existing
                        # one being updated
                        asset.date_modified = dateutil.parser.parse(
                            deployment_data['date_modified'])
                        asset.content = asset_content
                        asset.save()
                        # If this user already has an identically-named asset,
                        # append `xform.id_string` in parentheses for
                        # clarification
                        if Asset.objects.filter(
                                owner=user, name=asset.name).exists():
                            asset.name = u'{} ({})'.format(
                                asset.name, xform.id_string)
                            # `store_data()` handles saving the asset
                        # Copy the deployment-related data
                        kc_deployment = KobocatDeploymentBackend(asset)
                        kc_deployment.store_data({
                            'backend': 'kobocat',
                            'identifier': kc_deployment.make_identifier(
                                user.username, xform.id_string),
                            'active': xform.downloadable,
                            'backend_response': deployment_data,
                            'version': asset.version_id
                        })
                        if update_existing:
                            print_tabular(
                                'UPDATE',
                                user.username,
                                xform.id_string,
                                asset.uid,
                                time_diff_str
                            )
                        else:
                            print_tabular(
                                'CREATE',
                                user.username,
                                xform.id_string,
                                asset.uid,
                            )
                except Exception as e:
                    error_information = [
                        'FAIL',
                        user.username,
                        xform.id_string,
                        repr(e)
                    ]
                    print_tabular(*error_information)
                    logging.exception(u'sync_kobocat_xforms: {}'.format(
                        u', '.join(error_information)))

        _set_auto_field_update(Asset, "date_created", True)
        _set_auto_field_update(Asset, "date_modified", True)
Exemplo n.º 27
0
    def handle(self, *args, **options):
        if not settings.KOBOCAT_URL or not settings.KOBOCAT_INTERNAL_URL:
            raise ImproperlyConfigured(
                'Both KOBOCAT_URL and KOBOCAT_INTERNAL_URL must be '
                'configured before using this command')
        if options.get('quiet'):
            # Do not output anything
            def print_str(string):
                pass
        else:
            # Output status messages
            def print_str(string):
                print string

        def print_tabular(*args):
            print_str(u'\t'.join(map(lambda x: u'{}'.format(x), args)))

        users = User.objects.all()
        print_str('%d total users' % users.count())
        # A specific user or everyone?
        if options.get('username'):
            users = User.objects.filter(username=options.get('username'))
        print_str('%d users selected' % users.count())
        # Only users who prefer KPI or all users?
        if not options.get('all_users'):
            users = users.filter(
                models.Q(formbuilderpreference__preferred_builder=
                         FormBuilderPreference.KPI) |
                models.Q(formbuilderpreference=None)  # KPI is the default now
            )
            print_str('%d of selected users prefer KPI' % users.count())

        # We'll be copying the date fields from KC, so don't auto-update them
        _set_auto_field_update(Asset, "date_created", False)
        _set_auto_field_update(Asset, "date_modified", False)

        for user in users:
            (token, created) = Token.objects.get_or_create(user=user)
            existing_surveys = user.assets.filter(asset_type='survey')

            # Each asset that the user has already deployed to KC should have a
            # form uuid stored in its deployment data
            kpi_deployed_uuids = {}
            for existing_survey in existing_surveys:
                dd = existing_survey._deployment_data
                if 'backend_response' in dd:
                    kpi_deployed_uuids[dd['backend_response']['uuid']] = \
                        existing_survey.pk
            # Use our stub model to access KC's XForm objects
            xforms = user.xforms.all()
            for xform in xforms:
                try:
                    if xform.uuid in kpi_deployed_uuids:
                        # This KC form already has a corresponding KPI asset,
                        # but the user may have directly updated the form on KC
                        # after deploying from KPI. If so, then the KPI asset
                        # must be updated with the contents of the KC form
                        asset = user.assets.get(
                            pk=kpi_deployed_uuids[xform.uuid])
                        non_content_operation = 'NOOP'
                        # First, compare hashes to see if the KC form content
                        # has changed since the last deployment
                        backend_response = asset._deployment_data[
                            'backend_response']
                        if 'hash' in backend_response:
                            update_existing = backend_response['hash'] \
                                != xform.prefixed_hash
                            diff_str = 'hashes {}'.format(
                                'differ' if update_existing else 'match')
                        else:
                            # KC's `date_modified` is nearly useless, because
                            # every new submission changes it to the current
                            # time, and when there are no submissions, merely
                            # loading the projects list does the same (see
                            # https://github.com/kobotoolbox/kpi/issues/661#issuecomment-218073765).
                            # Still, in cases where KPI does not yet know the
                            # hash, comparing timestamps can sometimes save us
                            # from creating duplicate asset versions
                            time_diff = xform.date_modified - asset.date_modified
                            # Format the timedelta in a sane way, per
                            # http://stackoverflow.com/a/8408947
                            if time_diff < datetime.timedelta(0):
                                diff_str = '-{}'.format(-time_diff)
                            else:
                                diff_str = '+{}'.format(time_diff)
                            # If KC timestamp is sufficiently ahead of the KPI
                            # timestamp, we assume the KC form content was
                            # updated since the last KPI deployment
                            if time_diff > TIMESTAMP_DIFFERENCE_TOLERANCE:
                                update_existing = True
                            else:
                                update_existing = False
                                # We don't need an update, but we should copy
                                # the hash from KC to KPI for future reference
                                backend_response['hash'] = xform.prefixed_hash
                                asset.save(adjust_content=False)
                                print_tabular('HASH', user.username,
                                              xform.id_string, asset.uid,
                                              diff_str)

                        if not update_existing:
                            # Check to see if the asset name matches the xform
                            # title. Per #857, the xform title takes priority.
                            # The first check is a cheap one:
                            if asset.name != xform.title:
                                # Now do a full check of the name
                                desired_name = make_name_for_asset(
                                    asset, xform)
                                if asset.name != desired_name:
                                    asset.name = desired_name
                                    asset.save(adjust_content=False)
                                    non_content_operation = 'NAME'
                            # No further update needed. Skip to the next form
                            print_tabular(non_content_operation, user.username,
                                          xform.id_string, asset.uid, diff_str)
                            continue
                    else:
                        update_existing = False
                    # Load the xlsform from the KC API to avoid having to deal
                    # with S3 credentials, etc.
                    response = kc_forms_api_request(token,
                                                    xform.pk,
                                                    xlsform=True)
                    if response.status_code != 200:
                        error_information = [
                            'FAIL', user.username, xform.id_string,
                            u'unable to load xls ({})'.format(
                                response.status_code)
                        ]
                        print_tabular(*error_information)
                        logging.warning(u'sync_kobocat_xforms: {}'.format(
                            u', '.join(error_information)))
                        continue
                    # Convert the xlsform to KPI JSON
                    xls_io = io.BytesIO(response.content)
                    if xform.xls.name.endswith('.csv'):
                        dict_repr = xls2json_backends.csv_to_dict(xls_io)
                        xls_io = convert_dict_to_xls(dict_repr)
                    asset_content = xlsform_to_kpi_content_schema(xls_io)
                    # Get the form data from KC
                    response = kc_forms_api_request(token, xform.pk)
                    if response.status_code != 200:
                        error_information = [
                            'FAIL', user.username, xform.id_string,
                            'unable to load form data ({})'.format(
                                response.status_code)
                        ]
                        print_tabular(*error_information)
                        # Don't spam the log when KC responds with 404, which
                        # indicates that the form's XLS is missing from S3
                        if response.status_code != 404:
                            logging.error(u'sync_kobocat_xforms: {}'.format(
                                u', '.join(error_information)))
                        continue
                    deployment_data = response.json()
                    with transaction.atomic():
                        if not update_existing:
                            # This is an orphaned KC form. Build a new asset to
                            # match it
                            asset = Asset(asset_type='survey', owner=user)
                            asset.date_created = dateutil.parser.parse(
                                deployment_data['date_created'])
                        # Update the asset's modification date and content
                        # regardless of whether it's a new asset or an existing
                        # one being updated
                        asset.date_modified = dateutil.parser.parse(
                            deployment_data['date_modified'])
                        # we may want to do standardize the content (by calling
                        # `asset._standardize(asset_content)`), but this also
                        # could cause errors on unexpected forms so we can
                        # defer this until later.
                        asset.content = asset_content
                        asset.save(adjust_content=False)
                        asset.name = make_name_for_asset(asset, xform)
                        # Copy the deployment-related data
                        kc_deployment = KobocatDeploymentBackend(asset)
                        kc_deployment.store_data({
                            'backend':
                            'kobocat',
                            'identifier':
                            kc_deployment.make_identifier(
                                user.username, xform.id_string),
                            'active':
                            xform.downloadable,
                            'backend_response':
                            deployment_data,
                            'version':
                            asset.version_id
                        })
                        asset._mark_latest_version_as_deployed()
                        asset.save()
                        if update_existing:
                            print_tabular('UPDATE', user.username,
                                          xform.id_string, asset.uid, diff_str)
                        else:
                            print_tabular(
                                'CREATE',
                                user.username,
                                xform.id_string,
                                asset.uid,
                            )
                except Exception as e:
                    error_information = [
                        'FAIL', user.username, xform.id_string,
                        repr(e)
                    ]
                    print_tabular(*error_information)
                    logging.exception(u'sync_kobocat_xforms: {}'.format(
                        u', '.join(error_information)))

        _set_auto_field_update(Asset, "date_created", True)
        _set_auto_field_update(Asset, "date_modified", True)
Exemplo n.º 28
0
def convert_csv_to_xls(csv_repr):
    dict_repr = xls2json_backends.csv_to_dict(
        StringIO.StringIO(csv_repr.encode("utf-8")))
    return convert_dict_to_xls(dict_repr)
def _csv_to_dict(content):
    out_dict = {}
    for (key, sheet) in csv_to_dict(StringIO(content.encode('utf-8'))).items():
        if not re.search(r'_header$', key):
            out_dict[key] = sheet
    return out_dict
Exemplo n.º 30
0
 def test_order_of_dict_values(self):
     csv_dict = xls2json_backends.csv_to_dict(StringIO(sample_for_ordered_columns))
     self.assertEqual(csv_dict.keys()[0], "survey")
     survey = csv_dict.get("survey")
     self.assertEqual(survey[0].keys(), ["name", "type", "label", "required"])
Exemplo n.º 31
0
    def handle(self, *args, **options):
        if not settings.KOBOCAT_URL or not settings.KOBOCAT_INTERNAL_URL:
            raise ImproperlyConfigured(
                'Both KOBOCAT_URL and KOBOCAT_INTERNAL_URL must be '
                'configured before using this command')
        if options.get('quiet'):
            # Do not output anything
            def print_str(string):
                pass
        else:
            # Output status messages
            def print_str(string):
                print string

        def print_tabular(*args):
            print_str(u'\t'.join(map(lambda x: u'{}'.format(x), args)))

        users = User.objects.all()
        print_str('%d total users' % users.count())
        # A specific user or everyone?
        if options.get('username'):
            users = User.objects.filter(username=options.get('username'))
        print_str('%d users selected' % users.count())
        # Only users who prefer KPI or all users?
        if not options.get('all_users'):
            users = users.filter(
                models.Q(formbuilderpreference__preferred_builder=
                         FormBuilderPreference.KPI) |
                models.Q(formbuilderpreference=None)  # KPI is the default now
            )
            print_str('%d of selected users prefer KPI' % users.count())

        # We'll be copying the date fields from KC, so don't auto-update them
        _set_auto_field_update(Asset, "date_created", False)
        _set_auto_field_update(Asset, "date_modified", False)

        for user in users:
            (token, created) = Token.objects.get_or_create(user=user)
            existing_surveys = user.assets.filter(asset_type='survey')

            # Each asset that the user has already deployed to KC should have a
            # form uuid stored in its deployment data
            kpi_deployed_uuids = {}
            for existing_survey in existing_surveys:
                dd = existing_survey._deployment_data
                if 'backend_response' in dd:
                    kpi_deployed_uuids[dd['backend_response']['uuid']] = \
                        existing_survey.pk
            # Use our stub model to access KC's XForm objects
            xforms = user.xforms.all()
            for xform in xforms:
                try:
                    update_existing = False
                    if xform.uuid in kpi_deployed_uuids:
                        # This KC form already has a corresponding KPI asset,
                        # but the user may have directly updated the form on KC
                        # after deploying from KPI. If so, then the KPI asset
                        # must be updated with the contents of the KC form
                        asset = user.assets.get(
                            pk=kpi_deployed_uuids[xform.uuid])
                        time_diff = xform.date_modified - asset.date_modified
                        # Format the timedelta in a sane way, per
                        # http://stackoverflow.com/a/8408947
                        if time_diff < datetime.timedelta(0):
                            time_diff_str = '-{}'.format(-time_diff)
                        else:
                            time_diff_str = '+{}'.format(time_diff)
                        # If KC timestamp is not sufficiently ahead of the KPI
                        # timestamp, we assume the KC form content was not
                        # updated since the last KPI deployment
                        if time_diff <= TIMESTAMP_DIFFERENCE_TOLERANCE:
                            print_tabular('NOOP', user.username,
                                          xform.id_string, asset.uid,
                                          time_diff_str)
                            continue
                        else:
                            update_existing = True
                    # Load the xlsform from the KC API to avoid having to deal
                    # with S3 credentials, etc.
                    response = kc_forms_api_request(token,
                                                    xform.pk,
                                                    xlsform=True)
                    if response.status_code != 200:
                        error_information = [
                            'FAIL', user.username, xform.id_string,
                            u'unable to load xls ({})'.format(
                                response.status_code)
                        ]
                        print_tabular(*error_information)
                        logging.warning(u'sync_kobocat_xforms: {}'.format(
                            u', '.join(error_information)))
                        continue
                    # Convert the xlsform to KPI JSON
                    xls_io = io.BytesIO(response.content)
                    if xform.xls.name.endswith('.csv'):
                        dict_repr = xls2json_backends.csv_to_dict(xls_io)
                        xls_io = convert_dict_to_xls(dict_repr)
                    asset_content = xlsform_to_kpi_content_schema(xls_io)
                    # Get the form data from KC
                    response = kc_forms_api_request(token, xform.pk)
                    if response.status_code != 200:
                        error_information = [
                            'FAIL', user.username, xform.id_string,
                            'unable to load form data ({})'.format(
                                response.status_code)
                        ]
                        print_tabular(*error_information)
                        logging.error(u'sync_kobocat_xforms: {}'.format(
                            u', '.join(error_information)))
                        continue
                    deployment_data = response.json()
                    with transaction.atomic():
                        if not update_existing:
                            # This is an orphaned KC form. Build a new asset to
                            # match it
                            asset = Asset()
                            asset.asset_type = 'survey'
                            asset.owner = user
                            asset.date_created = dateutil.parser.parse(
                                deployment_data['date_created'])
                        # Update the asset's modification date and content
                        # regardless of whether it's a new asset or an existing
                        # one being updated
                        asset.date_modified = dateutil.parser.parse(
                            deployment_data['date_modified'])
                        asset.content = asset_content
                        asset.save()
                        # If this user already has an identically-named asset,
                        # append `xform.id_string` in parentheses for
                        # clarification
                        if Asset.objects.filter(owner=user,
                                                name=asset.name).exists():
                            asset.name = u'{} ({})'.format(
                                asset.name, xform.id_string)
                            # `store_data()` handles saving the asset
                        # Copy the deployment-related data
                        kc_deployment = KobocatDeploymentBackend(asset)
                        kc_deployment.store_data({
                            'backend':
                            'kobocat',
                            'identifier':
                            kc_deployment.make_identifier(
                                user.username, xform.id_string),
                            'active':
                            xform.downloadable,
                            'backend_response':
                            deployment_data,
                            'version':
                            asset.version_id
                        })
                        if update_existing:
                            print_tabular('UPDATE', user.username,
                                          xform.id_string, asset.uid,
                                          time_diff_str)
                        else:
                            print_tabular(
                                'CREATE',
                                user.username,
                                xform.id_string,
                                asset.uid,
                            )
                except Exception as e:
                    error_information = [
                        'FAIL', user.username, xform.id_string,
                        repr(e)
                    ]
                    print_tabular(*error_information)
                    logging.exception(u'sync_kobocat_xforms: {}'.format(
                        u', '.join(error_information)))

        _set_auto_field_update(Asset, "date_created", True)
        _set_auto_field_update(Asset, "date_modified", True)
Exemplo n.º 32
0
def convert_csv_to_ss_structure(csv_repr):
    dict_repr = dict(xls2json_backends.csv_to_dict(StringIO.StringIO(csv_repr.encode("utf-8"))))
    for key in dict_repr.keys():
        if re.match('.*_header$', key):
            del dict_repr[key]
    return dict_repr