示例#1
0
    def post_bulk_import(self, tbl, url_type, resource_type, mapping_types,
                         description, result_fields=None, **url_kwargs):
        # Internal method to post bulk imports.

        # Move to cloud storage
        file_name = str(uuid.uuid1())
        url = cloud_storage.post_file(tbl,
                                      url_type,
                                      file_path=file_name + '.zip',
                                      quoting=csv.QUOTE_ALL,
                                      **url_kwargs)
        logger.info(f'Table uploaded to {url_type}.')

        # Generate request json
        json = {"description": description,
                "file": {
                    "columnDelimiter": 'csv',
                    "columns": [{'name': c} for c in tbl.columns],
                    "fileName": file_name + '.csv',
                    "hasHeader": "True",
                    "hasQuotes": "True",
                    "sourceUrl": url},
                "actions": [{"resultFileSizeKbLimit": 5000,
                             "resourceType": resource_type,
                             "actionType": "loadMappedFile",
                             "mappingTypes": mapping_types}]
                }

        if result_fields:
            result_fields = [{'name': c} for c in result_fields]
            json['actions'][0]['columnsToIncludeInResultsFile'] = result_fields

        r = self.connection.post_request('bulkImportJobs', json=json)
        logger.info(f"Bulk upload {r['jobId']} created.")
        return r['jobId']
示例#2
0
    def upload_saved_list(self,
                          tbl,
                          list_name,
                          folder_id,
                          url_type,
                          id_type='vanid',
                          replace=False,
                          **url_kwargs):
        """
            .. warning::
               .. deprecated:: 0.X Use :func:`parsons.VAN.upload_saved_list_rest` instead.

        Upload a saved list. Invalid or unmatched person id records will be ignored. Your api user
        must be shared on the target folder.

        `Args:`
            tbl: parsons.Table
                A parsons table object containing one column of person ids.
            list_name: str
                The saved list name.
            folder_id: int
                The folder id where the list will be stored.
            url_type: str
                The cloud file storage to use to post the file. Currently only ``S3``.
            id_type: str
                The primary key type. The options, beyond ``vanid`` are specific to your
                instance of VAN.
            replace: boolean
                Replace saved list if already exists.
            **url_kwargs: kwargs
                Arguments to configure your cloud storage url type.
                    * S3 requires ``bucket`` argument and, if not stored as env variables
                      ``aws_access_key`` and ``aws_secret_access_key``.
        `Returns:`
            dict
                Upload results information included the number of matched and saved
                records in your list.
        """
        # Move to cloud storage
        file_name = str(uuid.uuid1())
        url = cloud_storage.post_file(tbl,
                                      url_type,
                                      file_path=file_name + '.zip',
                                      **url_kwargs)
        logger.info(f'Table uploaded to {url_type}.')

        # VAN errors for this method are not particularly useful or helpful. For that reason, we
        # will check that the folder exists and if the list already exists.
        logger.info('Validating folder id and list name.')
        if folder_id not in [x['folderId'] for x in self.get_folders()]:
            raise ValueError(
                "Folder does not exist or is not shared with API user.")

        if not replace:
            if list_name in [
                    x['name'] for x in self.get_saved_lists(folder_id)
            ]:
                raise ValueError(
                    "Saved list already exists. Set to replace argument to True or "
                    "change list name.")

        # i think we dont need this if we have the warning in the funciton description,
        # perhapse a style/standanrds decision
        if id_type == 'vanid':
            logger.warning('The NVPVAN SOAP API is deprecated, consider using '
                           'parsons.VAN.upload_saved_list_rest if you are '
                           'uploading a list of vanids.')
        # Create XML
        xml = self.connection.soap_client.factory.create(
            'CreateAndStoreSavedListMetaData')
        xml.SavedList._Name = list_name
        xml.DestinationFolder._ID = folder_id
        xml.SourceFile.FileName = file_name + '.csv'
        xml.SourceFile.FileUrl = url
        xml.SourceFile.FileCompression = 'zip'
        xml.Options.OverwriteExistingList = replace

        # Describe file
        file_desc = self.connection.soap_client.factory.create(
            'SeparatedFileFormatDescription')
        file_desc._name = 'csv'
        file_desc.HasHeaderRow = True

        # Only support single column for now
        col = self.connection.soap_client.factory.create('Column')
        col.Name = id_type
        col.RefersTo._Path = f"Person[@PersonIDType=\'{id_type}\']"
        col._Index = '0'

        # Assemble request
        file_desc.Columns.Column.append(col)
        xml.SourceFile.Format = file_desc

        r = Client.dict(
            self.connection.soap_client.service.CreateAndStoreSavedList(xml))
        if r:
            logger.info(
                f"Uploaded {r['ListSize']} records to {r['_Name']} saved list."
            )
        return r
示例#3
0
    def upload_saved_list_rest(self,
                               tbl,
                               url_type,
                               folder_id,
                               list_name,
                               description,
                               callback_url,
                               columns,
                               id_column,
                               delimiter='csv',
                               header=True,
                               quotes=True,
                               overwrite=None,
                               **url_kwargs):
        """
        Upload a saved list. Invalid or unmatched person id records will be ignored. Your api user
        must be shared on the target folder.

        `Args:`
            tbl: parsons.Table
                A parsons table object containing one column of person ids.
            url_type: str
                The cloud file storage to use to post the file. Currently only ``S3``.
            folder_id: int
                The folder id where the list will be stored.
            list_name: str
                The saved list name.
            description: str
                Description of the file upload job and the list.
            callback_url: string
                The configured HTTP listener to which successful list loads will send
                a standard webhook.
            columns: list
                A list of column names contained in the file.
            id_column : str
                The column name of the VAN ID column in the file. Must be VAN ID.
            delimiter: str
                The file delimiter used.
            header: boolean
                Whether or not the source file has a header row.
            quotes: boolean
                 Whether or not fields are enclosed in quotation marks within each
                 column of the file.
            overwrite: int
                Replace saved list if already exists.
            **url_kwargs: kwargs
                Arguments to configure your cloud storage url type.
                    * S3 requires ``bucket`` argument and, if not stored as env variables
                      ``aws_access_key`` and ``aws_secret_access_key``.
        `Returns:`
            dict
                Upload results information included the number of matched and saved
                records in your list.
        """
        rando = str(uuid.uuid1())
        file_name = rando + '.csv'
        url = cloud_storage.post_file(tbl,
                                      url_type,
                                      file_path=rando + '.zip',
                                      **url_kwargs)
        url_for_van = url.split('?')[
            0]  # hack around github.com/move-coop/parsons/issues/513
        logger.info(f'Table uploaded to {url_type}.')

        # VAN errors for this method are not particularly useful or helpful. For that reason, we
        # will check that the folder exists and if the list already exists.
        logger.info('Validating folder id and list name.')
        if folder_id not in [x['folderId'] for x in self.get_folders()]:
            raise ValueError(
                "Folder does not exist or is not shared with API user.")

        if list_name in [x['name'] for x in self.get_saved_lists(folder_id)]:
            raise ValueError("Saved list already exists. Set overwrite "
                             "argument to list ID or change list name.")

        if delimiter not in ['csv', 'tab', 'pipe']:
            raise ValueError("Delimiter must be one of 'csv', 'tab' or 'pipe'")

        columns = [{'name': c} for c in columns]
        delimiter = delimiter.capitalize()

        json = {
            "description":
            description,
            "file": {
                "columnDelimiter": delimiter,
                "columns": columns,
                "fileName": file_name,
                "hasHeader": header,
                "hasQuotes": quotes,
                "sourceUrl": url_for_van
            },
            "actions": [{
                "actionType": "LoadSavedListFile",
                "listDescription": description,
                "listName": list_name,
                "personIdColumn": id_column,
                "folderId": folder_id,
                "personIdType": "VANID"
            }],
            "listeners": [{
                "type": "URL",
                "value": callback_url
            }]
        }

        if overwrite:
            json["actions"][0]["overwriteExistingListId"] = overwrite

        logger.info(json)
        file_load_job_response = self.connection.post_request(
            'fileLoadingJobs', json=json)
        job_id = file_load_job_response['jobId']
        logger.info(f'Score loading job {job_id} created. Reference '
                    'callback url to check for job status')
        return file_load_job_response
示例#4
0
    def upload_scores(self,
                      tbl,
                      config,
                      url_type,
                      id_type='vanid',
                      email=None,
                      auto_approve=True,
                      approve_tolerance=.1,
                      **url_kwargs):
        """
        Upload scores. Use to create or overwrite scores. Multiple score loads
        should be configured in a single call. [1]_

        `Args:`
            tbl: object
                A parsons.Table object. The table must contain the scores and first column in the
                table must contain the primary key (e.g. vanid).
            config: list
                The score configuration. A list of dictionaries in which you specify the following

                .. list-table::
                    :widths: 20 80
                    :header-rows: 0

                    * - ``score_column``
                      - The name of the column where the score is housed.
                    * - ``score_id``
                      - The score slot id.

                Example:

                .. highlight:: python
                .. code-block:: python

                  [{'score1_id' : int, score1_column': str}
                   {'score2_id' : int, score2_column': str}]

            url_type: str
                The cloud file storage to use to post the file (``S3`` or ``GCS``).
                See :ref:`Cloud Storage <cloud-storage>` for more details.
            email: str
                An email address to send job load status updates.
            auto_approve: boolean
                If the scores are within the expected tolerance of deviation from the
                average values provided, then score will be automatically approved.
            approve_tolderance: float
                The deviation from the average scores allowed in order to automatically
                approve the score. Maximum of .1.
            **url_kwargs: kwargs
                Arguments to configure your cloud storage url type. See
                :ref:`Cloud Storage <cloud-storage>` for more details.
        `Returns:`
            int
               The score load job id.

        .. [1] NGPVAN asks that you load multiple scores in a single call to reduce the load
           on their servers.
        """

        # Move to cloud storage
        file_name = str(uuid.uuid1())
        url = cloud_storage.post_file(tbl,
                                      url_type,
                                      file_path=file_name + '.zip',
                                      **url_kwargs)
        logger.info(f'Table uploaded to {url_type}.')

        # Generate shell request
        json = {
            "description": 'A description',
            "file": {
                "columnDelimiter": 'csv',
                "columns": [{
                    'name': c
                } for c in tbl.columns],
                "fileName": file_name + '.csv',
                "hasHeader": "True",
                "hasQuotes": "False",
                "sourceUrl": url
            },
            "actions": []
        }

        # Configure each score
        for i in config:
            action = {
                "actionType": "score",
                "personIdColumn": tbl.columns[0],
                "personIdType": id_type,
                "scoreColumn": i['score_column'],
                "scoreId": i['score_id']
            }

            if auto_approve:
                average = petl.stats(tbl.table, i['score_column']).mean
                action['approvalCriteria'] = {
                    "average": average,
                    "tolerance": approve_tolerance
                }

            json['actions'].append(action)

        # Add email listener
        if email:
            json['listeners'] = [{"type": "EMAIL", 'value': email}]

        # Upload scores
        r = self.connection.post_request('fileLoadingJobs', json=json)
        logger.info(f"Scores job {r['jobId']} created.")
        return r['jobId']