def post_bulk_import(self, tbl, url_type, resource_type, mapping_types, description, result_fields=None, **url_kwargs): # Internal method to post bulk imports. # Move to cloud storage file_name = str(uuid.uuid1()) url = cloud_storage.post_file(tbl, url_type, file_path=file_name + '.zip', quoting=csv.QUOTE_ALL, **url_kwargs) logger.info(f'Table uploaded to {url_type}.') # Generate request json json = {"description": description, "file": { "columnDelimiter": 'csv', "columns": [{'name': c} for c in tbl.columns], "fileName": file_name + '.csv', "hasHeader": "True", "hasQuotes": "True", "sourceUrl": url}, "actions": [{"resultFileSizeKbLimit": 5000, "resourceType": resource_type, "actionType": "loadMappedFile", "mappingTypes": mapping_types}] } if result_fields: result_fields = [{'name': c} for c in result_fields] json['actions'][0]['columnsToIncludeInResultsFile'] = result_fields r = self.connection.post_request('bulkImportJobs', json=json) logger.info(f"Bulk upload {r['jobId']} created.") return r['jobId']
def upload_saved_list(self, tbl, list_name, folder_id, url_type, id_type='vanid', replace=False, **url_kwargs): """ .. warning:: .. deprecated:: 0.X Use :func:`parsons.VAN.upload_saved_list_rest` instead. Upload a saved list. Invalid or unmatched person id records will be ignored. Your api user must be shared on the target folder. `Args:` tbl: parsons.Table A parsons table object containing one column of person ids. list_name: str The saved list name. folder_id: int The folder id where the list will be stored. url_type: str The cloud file storage to use to post the file. Currently only ``S3``. id_type: str The primary key type. The options, beyond ``vanid`` are specific to your instance of VAN. replace: boolean Replace saved list if already exists. **url_kwargs: kwargs Arguments to configure your cloud storage url type. * S3 requires ``bucket`` argument and, if not stored as env variables ``aws_access_key`` and ``aws_secret_access_key``. `Returns:` dict Upload results information included the number of matched and saved records in your list. """ # Move to cloud storage file_name = str(uuid.uuid1()) url = cloud_storage.post_file(tbl, url_type, file_path=file_name + '.zip', **url_kwargs) logger.info(f'Table uploaded to {url_type}.') # VAN errors for this method are not particularly useful or helpful. For that reason, we # will check that the folder exists and if the list already exists. logger.info('Validating folder id and list name.') if folder_id not in [x['folderId'] for x in self.get_folders()]: raise ValueError( "Folder does not exist or is not shared with API user.") if not replace: if list_name in [ x['name'] for x in self.get_saved_lists(folder_id) ]: raise ValueError( "Saved list already exists. Set to replace argument to True or " "change list name.") # i think we dont need this if we have the warning in the funciton description, # perhapse a style/standanrds decision if id_type == 'vanid': logger.warning('The NVPVAN SOAP API is deprecated, consider using ' 'parsons.VAN.upload_saved_list_rest if you are ' 'uploading a list of vanids.') # Create XML xml = self.connection.soap_client.factory.create( 'CreateAndStoreSavedListMetaData') xml.SavedList._Name = list_name xml.DestinationFolder._ID = folder_id xml.SourceFile.FileName = file_name + '.csv' xml.SourceFile.FileUrl = url xml.SourceFile.FileCompression = 'zip' xml.Options.OverwriteExistingList = replace # Describe file file_desc = self.connection.soap_client.factory.create( 'SeparatedFileFormatDescription') file_desc._name = 'csv' file_desc.HasHeaderRow = True # Only support single column for now col = self.connection.soap_client.factory.create('Column') col.Name = id_type col.RefersTo._Path = f"Person[@PersonIDType=\'{id_type}\']" col._Index = '0' # Assemble request file_desc.Columns.Column.append(col) xml.SourceFile.Format = file_desc r = Client.dict( self.connection.soap_client.service.CreateAndStoreSavedList(xml)) if r: logger.info( f"Uploaded {r['ListSize']} records to {r['_Name']} saved list." ) return r
def upload_saved_list_rest(self, tbl, url_type, folder_id, list_name, description, callback_url, columns, id_column, delimiter='csv', header=True, quotes=True, overwrite=None, **url_kwargs): """ Upload a saved list. Invalid or unmatched person id records will be ignored. Your api user must be shared on the target folder. `Args:` tbl: parsons.Table A parsons table object containing one column of person ids. url_type: str The cloud file storage to use to post the file. Currently only ``S3``. folder_id: int The folder id where the list will be stored. list_name: str The saved list name. description: str Description of the file upload job and the list. callback_url: string The configured HTTP listener to which successful list loads will send a standard webhook. columns: list A list of column names contained in the file. id_column : str The column name of the VAN ID column in the file. Must be VAN ID. delimiter: str The file delimiter used. header: boolean Whether or not the source file has a header row. quotes: boolean Whether or not fields are enclosed in quotation marks within each column of the file. overwrite: int Replace saved list if already exists. **url_kwargs: kwargs Arguments to configure your cloud storage url type. * S3 requires ``bucket`` argument and, if not stored as env variables ``aws_access_key`` and ``aws_secret_access_key``. `Returns:` dict Upload results information included the number of matched and saved records in your list. """ rando = str(uuid.uuid1()) file_name = rando + '.csv' url = cloud_storage.post_file(tbl, url_type, file_path=rando + '.zip', **url_kwargs) url_for_van = url.split('?')[ 0] # hack around github.com/move-coop/parsons/issues/513 logger.info(f'Table uploaded to {url_type}.') # VAN errors for this method are not particularly useful or helpful. For that reason, we # will check that the folder exists and if the list already exists. logger.info('Validating folder id and list name.') if folder_id not in [x['folderId'] for x in self.get_folders()]: raise ValueError( "Folder does not exist or is not shared with API user.") if list_name in [x['name'] for x in self.get_saved_lists(folder_id)]: raise ValueError("Saved list already exists. Set overwrite " "argument to list ID or change list name.") if delimiter not in ['csv', 'tab', 'pipe']: raise ValueError("Delimiter must be one of 'csv', 'tab' or 'pipe'") columns = [{'name': c} for c in columns] delimiter = delimiter.capitalize() json = { "description": description, "file": { "columnDelimiter": delimiter, "columns": columns, "fileName": file_name, "hasHeader": header, "hasQuotes": quotes, "sourceUrl": url_for_van }, "actions": [{ "actionType": "LoadSavedListFile", "listDescription": description, "listName": list_name, "personIdColumn": id_column, "folderId": folder_id, "personIdType": "VANID" }], "listeners": [{ "type": "URL", "value": callback_url }] } if overwrite: json["actions"][0]["overwriteExistingListId"] = overwrite logger.info(json) file_load_job_response = self.connection.post_request( 'fileLoadingJobs', json=json) job_id = file_load_job_response['jobId'] logger.info(f'Score loading job {job_id} created. Reference ' 'callback url to check for job status') return file_load_job_response
def upload_scores(self, tbl, config, url_type, id_type='vanid', email=None, auto_approve=True, approve_tolerance=.1, **url_kwargs): """ Upload scores. Use to create or overwrite scores. Multiple score loads should be configured in a single call. [1]_ `Args:` tbl: object A parsons.Table object. The table must contain the scores and first column in the table must contain the primary key (e.g. vanid). config: list The score configuration. A list of dictionaries in which you specify the following .. list-table:: :widths: 20 80 :header-rows: 0 * - ``score_column`` - The name of the column where the score is housed. * - ``score_id`` - The score slot id. Example: .. highlight:: python .. code-block:: python [{'score1_id' : int, score1_column': str} {'score2_id' : int, score2_column': str}] url_type: str The cloud file storage to use to post the file (``S3`` or ``GCS``). See :ref:`Cloud Storage <cloud-storage>` for more details. email: str An email address to send job load status updates. auto_approve: boolean If the scores are within the expected tolerance of deviation from the average values provided, then score will be automatically approved. approve_tolderance: float The deviation from the average scores allowed in order to automatically approve the score. Maximum of .1. **url_kwargs: kwargs Arguments to configure your cloud storage url type. See :ref:`Cloud Storage <cloud-storage>` for more details. `Returns:` int The score load job id. .. [1] NGPVAN asks that you load multiple scores in a single call to reduce the load on their servers. """ # Move to cloud storage file_name = str(uuid.uuid1()) url = cloud_storage.post_file(tbl, url_type, file_path=file_name + '.zip', **url_kwargs) logger.info(f'Table uploaded to {url_type}.') # Generate shell request json = { "description": 'A description', "file": { "columnDelimiter": 'csv', "columns": [{ 'name': c } for c in tbl.columns], "fileName": file_name + '.csv', "hasHeader": "True", "hasQuotes": "False", "sourceUrl": url }, "actions": [] } # Configure each score for i in config: action = { "actionType": "score", "personIdColumn": tbl.columns[0], "personIdType": id_type, "scoreColumn": i['score_column'], "scoreId": i['score_id'] } if auto_approve: average = petl.stats(tbl.table, i['score_column']).mean action['approvalCriteria'] = { "average": average, "tolerance": approve_tolerance } json['actions'].append(action) # Add email listener if email: json['listeners'] = [{"type": "EMAIL", 'value': email}] # Upload scores r = self.connection.post_request('fileLoadingJobs', json=json) logger.info(f"Scores job {r['jobId']} created.") return r['jobId']