示例#1
0
文件: asset.py 项目: kobotoolbox/kpi
    def hash(self, request):
        """
        Creates an hash of `version_id` of all accessible assets by the user.
        Useful to detect changes between each request.

        :param request:
        :return: JSON
        """
        user = self.request.user
        if user.is_anonymous:
            raise exceptions.NotAuthenticated()
        else:
            accessible_assets = (get_objects_for_user(
                user, 'view_asset',
                Asset).filter(asset_type=ASSET_TYPE_SURVEY).order_by("uid"))

            assets_version_ids = [
                asset.version_id for asset in accessible_assets
                if asset.version_id is not None
            ]
            # Sort alphabetically
            assets_version_ids.sort()

            if len(assets_version_ids) > 0:
                hash_ = calculate_hash(''.join(assets_version_ids),
                                       algorithm='md5')
            else:
                hash_ = ''

            return Response({'hash': hash_})
示例#2
0
 def test_version_content_hash(self):
     _content = {
         'survey': [{
             'type': 'note',
             'label': 'Read me',
             'name': 'n1'
         }],
     }
     new_asset = Asset.objects.create(asset_type='survey', content=_content)
     expected_hash = calculate_hash(
         json.dumps(new_asset.content, sort_keys=True), 'sha1')
     self.assertEqual(new_asset.latest_version.content_hash, expected_hash)
     return new_asset
示例#3
0
    def set_md5_hash(self, md5_hash: Optional[str] = None):
        """
        Calculate md5 hash and store it in `metadata` field if it does not exist
        or empty.
        Value can be also set with the optional `md5_hash` parameter.
        If `md5_hash` is an empty string, the hash is recalculated.
        """
        if md5_hash is not None:
            self.metadata['hash'] = md5_hash

        if not self.metadata.get('hash'):
            if self.is_remote_url:
                md5_hash = calculate_hash(self.metadata['redirect_url'],
                                          prefix=True)
            else:
                try:
                    md5_hash = calculate_hash(self.content.file.read(),
                                              prefix=True)
                except ValueError:
                    md5_hash = None

            self.metadata['hash'] = md5_hash
示例#4
0
    def md5_hash(self):
        """
        Implements:
         - `OpenRosaManifestInterface.md5_hash()`
         - `SyncBackendMediaInterface.md5_hash()`
        """
        if self.asset_file:
            # If an AssetFile object is attached to this object, return its hash
            return self.asset_file.md5_hash
        else:
            # Fallback on this custom hash which does NOT represent the real
            # content but changes everytime to force its synchronization with
            # the deployment back end.
            # AssetFile object will be created on call to 'xml-external' endpoint
            return calculate_hash(
                f'{str(time.time())}.{self.backend_media_id}', prefix=True
            ) + '-time'

        return self.asset_file.md5_hash
示例#5
0
 def content_hash(self):
     # used to determine changes in the content from version to version
     # not saved, only compared with other asset_versions
     _json_string = json.dumps(self.version_content, sort_keys=True)
     return calculate_hash(_json_string, 'sha1')
示例#6
0
def sluggify(_str, _opts):
    """
    this method is ported over from coffeescript:
    jsapp/xlform/src/model.utils.coffee
    """
    _initial = _str
    if _str == '':
        return ''
    opts = dict(DEFAULT_OPTS, **_opts)

    if opts['lrstrip']:
        _str = _str.strip()
    elif opts['lstrip']:
        _str = _str.lstrip()
    elif opts['rstrip']:
        _str = _str.rstrip()

    if opts['lowerCase']:
        _str = _str.lower()

    if opts['underscores']:
        _str = re.sub(r'\s', '_', _str)
        # .replace(/[_]+/g, "_") <- replaces duplicates?

    if opts['replaceNonWordCharacters']:
        if opts['nonWordCharsExceptions']:
            regex = r'[^a-zA-Z0-9_{}]'.format(opts['nonWordCharsExceptions'])
        else:
            regex = r'[^a-zA-Z0-9_]+'  # Cannot use `\W`. Different behaviour with Python 2 & 3

        _str = re.sub(regex, '_', _str)
        if _str != '_' and re.search('_$', _str):
            _str = re.sub('_$', '', _str)

    if opts['characterLimit']:
        _limit = opts['characterLimit']
        if opts['characterLimit_shorten_method'] == 'ends':
            _str = _shorten_long_name(_str, _limit, join_with='_')
        else:
            _str = _str[0:opts['characterLimit']]

    if opts['validXmlTag']:
        if re.search(r'^\d', _str):
            _str = '_' + _str

    if opts['preventDuplicateUnderscores']:
        while re.search('__', _str):
            _str = re.sub('__', '_', _str)

    names = opts.get('other_names', opts['preventDuplicates'])
    if isinstance(names, list):
        names_lc = [name.lower() for name in names]
        attempt_base = _str
        if len(attempt_base) == 0:
            # empty string because arabic / cyrillic characters
            _str = 'h{}'.format(calculate_hash(_initial[0:7])[0:7])
        attempt = attempt_base
        incremented = 0
        while attempt.lower() in names_lc:
            incremented += 1
            attempt = "{0}_{1:03d}".format(attempt_base, incremented)
        _str = attempt

    return _str
示例#7
0
 def md5_hash(self):
     return calculate_hash(self.xml)
示例#8
0
def gravatar_url(email, https=True):
    return "%s://www.gravatar.com/avatar/%s?%s" % (
        'https' if https else 'http',
        calculate_hash(email.lower()),
        urlencode({'s': '40'}),
        )
示例#9
0
    def external(self, request, paired_data_uid, **kwargs):
        """
        Returns an XML which contains data submitted to paired asset
        Creates the endpoints
        - /api/v2/assets/<parent_lookup_asset>/paired-data/<paired_data_uid>/external/
        - /api/v2/assets/<parent_lookup_asset>/paired-data/<paired_data_uid>/external.xml/
        """
        paired_data = self.get_object()

        # Retrieve the source if it exists
        source_asset = paired_data.get_source()

        if not source_asset:
            # We can enter this condition when source data sharing has been
            # deactivated after it has been paired with current form.
            # We don't want to keep zombie files on storage.
            try:
                asset_file = self.asset.asset_files.get(uid=paired_data_uid)
            except AssetFile.DoesNotExist:
                pass
            else:
                asset_file.delete()

            raise Http404

        if not source_asset.has_deployment or not self.asset.has_deployment:
            raise Http404

        old_hash = None
        # Retrieve data from related asset file.
        # If data has already been fetched once, an `AssetFile` should exist.
        # Otherwise, we create one to store the generated XML.
        try:
            asset_file = self.asset.asset_files.get(uid=paired_data_uid)
        except AssetFile.DoesNotExist:
            asset_file = AssetFile(
                uid=paired_data_uid,
                asset=self.asset,
                file_type=AssetFile.PAIRED_DATA,
                user=self.asset.owner,
            )
            # When asset file is new, we consider its content as expired to
            # force its creation below
            has_expired = True
        else:
            if not asset_file.content:
                # if `asset_file` exists but does not have any content, it means
                # `paired_data` has changed since last time this endpoint has been
                # called. E.g.: Project owner has changed the questions they want
                # to include in the `xml-external` file
                has_expired = True
            else:
                old_hash = asset_file.md5_hash
                timedelta = timezone.now() - asset_file.date_modified
                has_expired = (timedelta.total_seconds() >
                               settings.PAIRED_DATA_EXPIRATION)

        # ToDo evaluate adding headers for caching and a HTTP 304 status code
        if not has_expired:
            return Response(asset_file.content.file.read().decode())

        # If the content of `asset_file' has expired, let's regenerate the XML
        submissions = source_asset.deployment.get_submissions(
            self.asset.owner, format_type=SUBMISSION_FORMAT_TYPE_XML)
        parsed_submissions = []

        for submission in submissions:
            # Use `rename_root_node_to='data'` to rename the root node of each
            # submission to `data` so that form authors do not have to rewrite
            # their `xml-external` formulas any time the asset UID changes,
            # e.g. when cloning a form or creating a project from a template.
            # Set `use_xpath=True` because `paired_data.fields` uses full group
            # hierarchies, not just question names.
            parsed_submissions.append(
                strip_nodes(
                    submission,
                    paired_data.allowed_fields,
                    use_xpath=True,
                    rename_root_node_to='data',
                ))

        filename = paired_data.filename
        parsed_submissions_to_str = ''.join(parsed_submissions)
        root_tag_name = SubmissionXMLRenderer.root_tag_name
        xml_ = add_xml_declaration(f'<{root_tag_name}>'
                                   f'{parsed_submissions_to_str}'
                                   f'</{root_tag_name}>')

        if not parsed_submissions:
            # We do not want to cache an empty file
            return Response(xml_)

        # We need to delete the current file (if it exists) when filename
        # has changed. Otherwise, it would leave an orphan file on storage
        if asset_file.pk and asset_file.content.name != filename:
            asset_file.content.delete()

        asset_file.content = ContentFile(xml_.encode(), name=filename)

        # `xml_` is already there in memory, let's use its content to get its
        # hash and store it within `asset_file` metadata
        asset_file.set_md5_hash(calculate_hash(xml_, prefix=True))
        asset_file.save()
        if old_hash != asset_file.md5_hash:
            # resync paired data to the deployment backend
            self.asset.deployment.sync_media_files(AssetFile.PAIRED_DATA)

        return Response(xml_)