Пример #1
0
    def get_citation_from_dataset_schema_or_None(self):
        """
        Return the citation text from self.dataset_schema_info (a bit ugly...)
        Trying to return string from: self.dataset.dataset_schema_info['citation'][0]
        """
        if self.has_error():
            # Shouldn't happen...
            return err_resp(self.get_err_msg())

        if not self.dataset.dataset_schema_info:
            return err_resp('".dataset_schema_info" is empty')

        if not 'citation' in self.dataset.dataset_schema_info:
            return ok_resp(None)

        # If the citation key is found, then do error checking....
        if (not self.dataset.dataset_schema_info['citation']) or \
                (not isinstance(self.dataset.dataset_schema_info['citation'], list)):
            return err_resp(
                '"citation" within ".dataset_schema_info" is empty or not a list'
            )

        if not 'text' in self.dataset.dataset_schema_info['citation'][0]:
            return err_resp(
                '"[\'citation\'][0][\'text\']" not found in ".dataset_schema_info"'
            )

        return ok_resp(self.dataset.dataset_schema_info['citation'][0]['text'])
Пример #2
0
    def get_file_specific_schema_info(full_schema_info,
                                      file_id=None,
                                      file_persistent_id=None):
        """
        Navigate the JSON-LD schema.org info to retrieve file specific info
       "distribution":[
          {
             "@type":"DataDownload",
             "name":"Crisis.PDF",
             "fileFormat":"application/pdf",
             "contentSize":677112,
             "description":"Article related to this study: The Supreme Court During Crisis: How War Affects Only Nonwar Cases",
             "@id":"https://doi.org/10.7910/DVN/OLD7MB/PZPDJF",
             "identifier":"https://doi.org/10.7910/DVN/OLD7MB/PZPDJF",
             "contentUrl":"https://dataverse.harvard.edu/api/access/datafile/101646"
          },
          (etc)
        ]
        """
        print('get_file_specific_schema_info', file_id, file_persistent_id)
        if not isinstance(full_schema_info, dict):
            return err_resp('"full_schema_info" must be a Python dict')

        if not dv_static.SCHEMA_KEY_DISTRIBUTION in full_schema_info:
            return err_resp(
                f'"{dv_static.SCHEMA_KEY_DISTRIBUTION}" not found in the schema'
            )

        url_ending_1 = f'/{file_id}'
        file_doi = file_persistent_id.split(
            ':')[-1] if file_persistent_id else None
        print('file_doi', file_doi)
        for file_info in full_schema_info[dv_static.SCHEMA_KEY_DISTRIBUTION]:

            # Try to match the the /{fileId} id to the end of the contentURL
            #   example "contentUrl": https://dataverse.harvard.edu/api/access/datafile/101646"
            #
            if dv_static.SCHEMA_KEY_CONTENTURL in file_info:
                content_url = file_info[dv_static.SCHEMA_KEY_CONTENTURL]
                if content_url and content_url.endswith(url_ending_1):
                    return ok_resp(file_info)

            # If there's there's a file DOI, try to match it with the identifier
            #
            #   example "identifier": "https://doi.org/10.7910/DVN/B7DHBK/BSNYLQ"
            #
            if file_doi and dv_static.SCHEMA_KEY_IDENTIFIER in file_info:
                identifier = file_info[dv_static.SCHEMA_KEY_IDENTIFIER]
                if identifier and identifier.endswith(file_doi):
                    return ok_resp(file_info)

        if file_id:
            user_msg = f'Did not find fileId "{file_id}"'
        elif file_info:
            user_msg = f'Did not find file DOI "{file_doi}"'
        else:
            user_msg = ''

        return err_resp(f'Info for file not found in the schema. {user_msg}')
Пример #3
0
    def save(self, **kwargs):
        """
        Validate each release request and return any errors that arise.
        A bit of a misuse of the "save" terminology since we aren't creating
        any rows in the database, but consistent with the fact that this is a post.
        Expects a request of the form:
        {
            "analysis_plan_id": abcd-1234,
            "dp_statistics": [{
                 "error": "",
                 "label": "EyeHeight",
                 "locked": false,
                 "epsilon": 0.0625,
                 "variable": "eyeHeight",
                 "statistic": "mean",
                 "fixed_value": "5",
                 "handle_as_fixed": true,
                 "missing_values_handling": "insert_fixed"
                 },
                {
                 "error": "",
                 "label": "EyeHeight",
                 "locked": false,
                 "epsilon": 0.0625,
                 "variable": "eyeHeight",
                 "statistic": "count",
                 "fixed_value": "5",
                 "handle_as_fixed": true,
                 "missing_values_handling": "insert_fixed"
                 }
            ]
         }
        :param kwargs:
        :return:
        """
        opendp_user = kwargs.get('opendp_user')
        if not isinstance(opendp_user, get_user_model()):
            user_msg = 'Not an OpenDP User'
            return err_resp(user_msg)

        analysis_plan_id = self.validated_data['analysis_plan_id']

        dp_statistics = self.validated_data['dp_statistics']
        # import json; print('dp_statistics', json.dumps(dp_statistics, indent=4))

        validate_util = ValidateReleaseUtil.validate_mode(
            opendp_user, analysis_plan_id, dp_statistics)

        if validate_util.has_error():
            # This is a big error, check for it before evaluating individual statistics
            #
            user_msg = validate_util.get_err_msg()
            # Can you return a 400 / raise an Exception here with the error message?
            # How should this be used?
            return err_resp(user_msg)  #dict(success=False, message=user_msg)

        #print('(validate_util.validation_info)', validate_util.validation_info)
        return ok_resp(validate_util.validation_info)
Пример #4
0
    def create_plan(
        dataset_object_id: str,
        opendp_user: get_user_model()) -> BasicResponse:
        """
        Create an AnalysisPlan object
        Input: DatasetInfo.object_id
        Initial settings:
            analyst - logged in user
            user_step - (initial step, check branch)
            variable_info - default to DepositorSetup values
        """
        if not dataset_object_id:
            return err_resp(astatic.ERR_MSG_DATASET_ID_REQUIRED,
                            data=status.HTTP_400_BAD_REQUEST)
        if not isinstance(opendp_user, get_user_model()):
            return err_resp(astatic.ERR_MSG_USER_REQUIRED,
                            data=status.HTTP_400_BAD_REQUEST)

        # -------------------------------
        # Retrieve DataSetInfo object
        # -------------------------------
        try:
            ds_info = DataSetInfo.objects.get(object_id=dataset_object_id,
                                              creator=opendp_user)
        except DataSetInfo.DoesNotExist:
            return err_resp(astatic.ERR_MSG_NO_DATASET,
                            data=status.HTTP_400_BAD_REQUEST)

        # ------------------------------------
        # Is the DepositorSetupInfo complete?
        # ------------------------------------
        depositor_info = ds_info.depositor_setup_info
        if not depositor_info.is_complete:
            return err_resp(astatic.ERR_MSG_SETUP_INCOMPLETE,
                            data=status.HTTP_422_UNPROCESSABLE_ENTITY)

        # ------------------------------------
        # Create the plan!
        # ------------------------------------
        plan = AnalysisPlan(\
                analyst=opendp_user,
                name=f'Plan {get_rand_alphanumeric(7)}',  # need a better name here!
                dataset=ds_info,
                is_complete=False,
                variable_info=ds_info.depositor_setup_info.variable_info,
                user_step=AnalysisPlan.AnalystSteps.STEP_0700_VARIABLES_CONFIRMED)

        plan.save()

        return ok_resp(plan, message='Plan created!')
Пример #5
0
    def get_dataset_size(self) -> BasicResponse:
        """Retrieve the rowCount index from the data_profile -- not always avaiable"""
        if not self.data_profile:
            return err_resp('Data profile not available')

        if 'dataset' not in self.data_profile:
            return err_resp('Dataset information not available in profile')

        if 'rowCount' not in self.data_profile['dataset']:
            return err_resp('"rowCount" information not available in profile.')

        row_count = self.data_profile['dataset']['rowCount']
        if row_count is None:
            return err_resp(
                '"rowCount" information not available in profile (id:2')

        return ok_resp(self.data_profile['dataset']['rowCount'])
Пример #6
0
 def get_dataset_export_json(self, doi, format_type):
     """
     Get dataset export
     """
     try:
         response = self.native_api.get_dataset_export(doi, format_type)
     except ConnectionError as err_obj:
         return err_resp(f'Failed to connect. {err_obj}')
     return response
Пример #7
0
    def get_dataset_info(self) -> BasicResponse:
        """Get the related DataSetInfo object"""
        assert self.is_valid(
        ), "Do not call this method before checking \".is_valid()\""

        try:
            dsi = DataSetInfo.objects.get(
                object_id=self.validated_data.get('object_id'))
        except DataSetInfo.DoesNotExist:
            return err_resp(dstatic.ERR_MSG_DATASET_INFO_NOT_FOUND)

        return ok_resp(dsi)
Пример #8
0
    def get_name_from_dataset_schema(self) -> BasicResponse:
        """
        Return the "name" text from self.dataset_schema_info (a bit ugly...)
        Trying to return string from: self.dataset.dataset_schema_info['name']
        """
        if self.has_error():
            # Shouldn't happen...
            return err_resp(self.get_err_msg())

        if not self.dataset.dataset_schema_info:
            return err_resp('".dataset_schema_info" is empty')

        if not 'name' in self.dataset.dataset_schema_info:
            return err_resp(
                '"name" not found in ".dataset_schema_info" not found')

        ds_name = self.dataset.dataset_schema_info['name']
        if not ds_name:
            return err_resp('"name" within ".dataset_schema_info" is empty')

        return ok_resp(ds_name)
Пример #9
0
    def get_variable_order(self, as_indices=False) -> BasicResponse:
        """
        Retrieve the variableOrder list from the data_profile
         Example data structure:
          {"dataset":{
              "rowCount":6610,
              "variableCount":20,
              "variableOrder":[
                 [0, "ccode"],
                 [1, "country"],
                 [2, "cname" ],
                ]
            }
            etc
          }

        :param as_indices, if True, return [0, 1, 2], etc.
        """
        if not self.data_profile:
            return err_resp('Data profile not available')

        if not 'dataset' in self.data_profile:
            return err_resp('Dataset information not available in profile')

        if not 'variableOrder' in self.data_profile['dataset']:
            return err_resp(
                '"variableOrder" information not available in profile (id:2')

        variable_order = self.data_profile['dataset']['variableOrder']

        if as_indices:
            try:
                return ok_resp([idx for idx, _var_name in variable_order])
            except Exception as ex_obj:
                user_msg = (
                    f'"variableOrder" information not in proper format: {variable_order}'
                    f' (exception: {ex_obj}')
                return err_resp(user_msg)

        return ok_resp(variable_order)
Пример #10
0
    def get_file_info(self):
        """
        Return information from the "DataverseFileInfo.file_schema_info" field
        Ideal:
        {
            "name": "crisis.tab"
            "identifier": "https://doi.org/10.7910/DVN/OLD7MB/ZI4N3J",
            "fileFormat": "text/tab-separated-values",
        }
        """
        if self.has_error():
            # Shouldn't happen!
            return err_resp(self.get_err_msg())

        if not self.dataset.file_schema_info:
            return err_resp('".file_schema_info" is empty')

        file_dict = {}

        if 'name' in self.dataset.file_schema_info:
            file_dict['name'] = self.dataset.file_schema_info['name']
        else:
            return err_resp(
                '"name" not found in ".file_schema_info" not found')

        if 'identifier' in self.dataset.file_schema_info:
            file_dict['identifier'] = self.dataset.file_schema_info[
                'identifier']
        else:
            file_dict['identifier'] = None

        if 'fileFormat' in self.dataset.file_schema_info:
            file_dict['fileFormat'] = self.dataset.file_schema_info[
                'fileFormat']
        else:
            file_dict['fileFormat'] = None

        return ok_resp(file_dict)
Пример #11
0
    def get_dataset_info_with_user_check(
        self, user: get_user_model()) -> BasicResponse:
        """Get the related DataSetInfo object and check that the user matches the creator"""
        assert self.is_valid(
        ), "Do not call this method before checking \".is_valid()\""

        try:
            dsi = DataSetInfo.objects.get(
                object_id=self.validated_data.get('object_id'), creator=user)
        except DataSetInfo.DoesNotExist:
            return err_resp(
                dstatic.ERR_MSG_DATASET_INFO_NOT_FOUND_CURRENT_USER)

        return ok_resp(dsi)
Пример #12
0
    def retrieve_analysis(
        analysis_object_id: str,
        opendp_user: get_user_model()) -> BasicResponse:
        """
        Retrieve an existing AnalysisPlan object by its object_id and analyst
        """
        if not analysis_object_id:
            return err_resp(astatic.ERR_MSG_ANALYSIS_ID_REQUIRED,
                            data=status.HTTP_400_BAD_REQUEST)
        if not isinstance(opendp_user, get_user_model()):
            return err_resp(astatic.ERR_MSG_USER_REQUIRED,
                            data=status.HTTP_400_BAD_REQUEST)

        # -------------------------------
        # Retrieve AnalysisPlan object
        # -------------------------------
        try:
            plan = AnalysisPlan.objects.get(object_id=analysis_object_id,
                                            analyst=opendp_user)
        except AnalysisPlan.DoesNotExist:
            return err_resp(astatic.ERR_MSG_NO_ANALYSIS_PLAN,
                            data=status.HTTP_400_BAD_REQUEST)

        return ok_resp(plan, message='Plan created!')
Пример #13
0
    def make_test_handoff_object(self):
        """For unit tests, make a DataverseHandoff object with the same params"""

        params = self.as_dict()
        if dv_static.DV_PARAM_SITE_URL in params:
            del params[dv_static.DV_PARAM_SITE_URL]

        dv_handoff = DataverseHandoff(**params)

        reg_dv = RegisteredDataverse.get_registered_dataverse(self.site_url)
        if not reg_dv:
            return err_resp(
                'No RegisteredDataverse for site_url {self.site_url}')

        dv_handoff.dv_installation = reg_dv
        dv_handoff.save()

        return ok_resp(data=dv_handoff)
Пример #14
0
def profile_dataset_info(dataset_object_id: DataSetInfo.object_id, websocket_id=None) -> BasicResponse:
    """
    Using the DataSetInfo object_id, download and profile a dataset.
    If the "websocket_id" is defined, send back websocket messages

    Assumes: if websocket_id is None, then assume this is being called w/o celery
        and can return complex objects such as the DownloadAndProfileUtil.

        If websocket_id is defined, this function returns a dict:
         {'success': True/False, 'message': "A user message"}
    """
    dp_util = DownloadAndProfileUtil(dataset_object_id, websocket_id)
    if dp_util.has_error():
        if websocket_id:
            return dict(success=False, messsage=dp_util.get_err_msg())
        return err_resp(dp_util.get_err_msg())  # direct error `message`

    if websocket_id:
        return dict(success=True, messsage='Profile in process')

    return ok_resp(dp_util)
Пример #15
0
    def update_dataverse_user(self):
        """Update the DataverseUser parameters"""
        try:
            # print([x.__dict__ for x in DataverseUser.objects.all()])
            # print(f"dv_user_handler: opendp_user {self.opendp_user.id}")
            # print(f"dv_user_handler: registered dataverse {self.registered_dataverse.id}")
            dataverse_user = DataverseUser.objects.get(
                user=self.opendp_user,
                dv_installation=self.registered_dataverse)
        except DataverseUser.DoesNotExist as ex:
            return err_resp('Dataverse user does not exist')

        # Update the parameters
        dataverse_user.persistent_id = self.dataverse_persistent_id
        dataverse_user.first_name = self.first_name
        dataverse_user.last_name = self.last_name
        dataverse_user.email = self.email

        # Save it!
        dataverse_user.save()

        return ok_resp(dataverse_user)
Пример #16
0
    def get_user_info(self, user_api_token=None):
        """
        Placeholder until pyDataverse API is updated
        """
        api_token = user_api_token if user_api_token else self.api_token
        # remove any trailing "/"
        ye_host = RegisteredDataverse.format_dv_url(self._host)
        #while ye_host.endswith('/'):
        #    ye_host = ye_host[:-1]

        # format url
        dv_url = f'{ye_host}/api/v1/users/:me'

        # make the request
        headers = {'X-Dataverse-key': api_token}
        try:
            response = requests.get(dv_url, headers=headers)
        except ConnectionError as err_obj:
            return err_resp(f'Failed to connect. {err_obj}')

        if response.status_code == 200:
            if not response.content:
                # In this instance the response content is an empty string or None -- shouldn't happen...
                #
                return err_resp(f"Dataverse returned an HTTP 200 status code but failed to return a response.")

            resp_json = response.json()
            dv_status = resp_json.get(dv_static.DV_KEY_STATUS)
            if not dv_status:
                return err_resp(f"Dataverse response failed to return a 'status'.")

            if dv_status == dv_static.STATUS_VAL_ERROR:
                user_msg = resp_json.get(dv_static.DV_KEY_MESSAGE,
                                         '(No message from Dataverse)')
                return err_resp(f"Dataverse error: {user_msg}")

            return ok_resp(response.json())

        try:
            json_resp = response.json()
            if 'message' in json_resp:
                return err_resp(json_resp['message'])
        except ValueError:
            pass
        return err_resp(f'Status code: {response.status_code} {response.text}')
Пример #17
0
    def get_variable_index(self, var_name: str) -> BasicResponse:
        """Retrieve the variable index from the data_profile for a specific variable name
         Example data structure:
          {"dataset":{
              "rowCount":6610,
              "variableCount":20,
              "variableOrder":[
                 [0, "ccode"],
                 [1, "country"],
                 [2, "cname" ],
                ]
            }
            etc
          }

        :param var_name - variable name, e.g. "cname" would return 1
        """
        if not self.data_profile:
            return err_resp('Data profile not available')

        if 'dataset' not in self.data_profile:
            return err_resp('Dataset information not available in profile')

        if 'variableOrder' not in self.data_profile['dataset']:
            return err_resp(
                '"variableOrder" information not available in profile (id:2')

        variable_order = self.data_profile['dataset']['variableOrder']
        if not variable_order:
            return err_resp('Bad "variableOrder" information in profile.')

        try:
            for idx, feature in self.data_profile['dataset']['variableOrder']:
                if feature == var_name:
                    return ok_resp(idx)
                elif feature == camel_to_snake(var_name):  # Temp workaround!!!
                    # Temp workaround!!! See Issue #300
                    # https://github.com/opendp/dpcreator/issues/300
                    return ok_resp(idx)

        except ValueError:
            return err_resp(
                'Bad "variableOrder" information in profile. (id:3)')

        return err_resp(f'Index not found for variable "{var_name}"')