def cleanUpAddress(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) if operationFlag != '': # First replace the escape character with comma + space dataFlow = dataFlow.str_replace(operationFlag, r'\0d0a', ', ') # Now capture any empty addresses dataFlow = dataFlow.replace(operationFlag, r', , , ', None) print('{0}: cleaned up addresses in column {1}'.format( dataName, operationFlag)) # Will now try to split out the address into component columns... # builder = dataFlow.builders.split_column_by_example(source_column=operationFlag) # builder.add_example(example=('552 Malvern Avenue, St. George, Bristol', \ # ['552 Malvern Avenue', 'St. George', 'Bristol'])) # builder.add_example(example=('224A Tomswood St, Hainault, Ilford, Cornwall', \ # ['224A Tomswood St', 'Hainault', 'Ilford', 'Cornwall'])) # dataFlow = builder.to_dataflow() # dataFlow = dataFlow.split_column_by_delimiters(operationFlag, ', ', True) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, 'A') print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def removeDuplicates(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) if operationFlag != '': columnsToKeep = operationFlag numberOfRowsBefore = dataFlow.row_count dataFlow = dataFlow.distinct( dprep.ColumnSelector(columnsToKeep, True, True, invert=False)) print( '{0}: removed duplicates from column {1} rows before {2} rows afer {3}' .format(dataName, operationFlag, numberOfRowsBefore, dataFlow.row_count)) else: print('{0}: no duplicate processing required'.format(dataName)) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, qualityFlag) print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def automaticallyDetectColumnTypes(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) # Now perform the operation on the dataFlow : ie remove the number of rows specified from the top # Perform the operation if operationFlag == 'Yes': # Detect and apply column types builder = dataFlow.builders.set_column_types() builder.learn() print('{0}: candidates detected {1}'.format( dataName, builder.conversion_candidates)) builder.ambiguous_date_conversions_keep_month_day() dataFlow = builder.to_dataflow() dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, 'A') print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def parseNulls(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) if operationFlag != '': # Get a list of the columns and count them... dataFlowColumns = list(dataFlow.get_profile().columns.keys()) # Replace any occurences null, using custom, across all columns... dataFlow = dataFlow.replace_na(dataFlowColumns, custom_na_list=operationFlag) print( '{0}: parsed nulls including custom string {1} from {2} columns' .format(dataName, operationFlag, len(dataFlowColumns))) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, 'A') print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def removeRowsFromTop(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) # Now perform the operation on the dataFlow : ie remove the number of rows specified from the top numberOfRowsToRemove = int(operationFlag) dataFlow = dataFlow.skip(numberOfRowsToRemove) print('{0}: removed first {1} row(s)'.format(dataName, numberOfRowsToRemove)) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, qualityFlag) print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) # Now return all of the components badk to the main loop... return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def renameColumns(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) if operationFlag != '': # Do the operation on columns to rename them... print('{0}: renamed {1} columns'.format(dataName, operationFlag)) else: print('{0}: no operation to perform'.format(dataName)) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, 'A') print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def mapLookups(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) if operationFlag != '': transforms = load_transformation_configuration('./Config/' + operationFlag) if len(transforms) > 1: lookups = get_lookups_from_transforms(transforms) for key in lookups: lookupDictionary = lookups[key] replacements = [] dataFlow = dataFlow.set_column_types( {key: dprep.FieldType.STRING}) for lookup in lookupDictionary: replacements.append( ReplacementsValue(lookup, lookupDictionary[lookup])) destination_column = get_destination_column_name( key, transforms) dataFlow = dataFlow.map_column(key, destination_column, replacements) print(dataName + ': Transformed lookups for column - ' + key + '. Added new column ' + destination_column) else: print('{0}: no look-up processing required'.format(dataName)) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, qualityFlag) print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def createUPMDataflow(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) if operationFlag != '': mappingConfig = dprep.read_csv( './Config/' + operationFlag).to_pandas_dataframe() targetDataFlow = dataFlow columnsToKeep = '' for sourceTable in mappingConfig[mappingConfig.SourceTable == dataName]['SourceTable'].unique(): for sourceColumn, targetColumn in mappingConfig[ mappingConfig.SourceTable == sourceTable][[ 'SourceColumn', 'TargetColumn' ]].values: if columnsToKeep is '': columnsToKeep = targetColumn else: columnsToKeep = columnsToKeep + '|' + targetColumn targetDataFlow = targetDataFlow.rename_columns( {sourceColumn: targetColumn}) targetDataFlow = targetDataFlow.drop_columns( dprep.ColumnSelector(columnsToKeep, True, True, invert=True)) newPackageName = next( iter(mappingConfig[mappingConfig.SourceTable == dataName] ['TargetTable'].unique())) createNewPackageDirectory(newPackageName) saveDataFlowPackage(targetDataFlow, newPackageName, thisStageNumber, 'A') else: print('{0}: no duplicate processing required'.format(dataName)) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, qualityFlag) print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
if operationFlag == 'Yes': print('{0}: loading data from file path {1}'.format( dataName, fullFilePath)) newDataFlow = dprep.read_csv(fullFilePath) dataProfile = newDataFlow.get_profile() columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) columnInventoryAll = columnInventoryAll.append(columnInventory) print('{0}: generated column inventory'.format(dataName)) dataFlowInventory = getDataFlowStats(newDataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventoryAll = dataFlowInventoryAll.append(dataFlowInventory) print('{0}: generated data flow inventory'.format(dataName)) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(newDataFlow, dataName, thisStageNumber, 'A') print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) else: print('{0}: no package file created.'.format(dataName)) # Once we have processed all dataflows, we save the inventories away saveColumnInventory(columnInventoryAll, thisStageNumber) saveDataFlowInventory(dataFlowInventoryAll, thisStageNumber)
def splitTableBasedOnSingleColumn(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) dataProfile = dataFlow.get_profile() # Set up empty intermediate dataframes that we will use to build up inventories at both dataFlow and column level dataFlowInventoryIntermediate = pd.DataFrame() columnInventoryIntermediate = pd.DataFrame() if operationFlag != '': # First, grab the unique set of values in the column valuesInColumn = dataProfile.columns[operationFlag].value_counts # Now filter the original data flow based on each unique value in turn and fork a new data flow! for valueToSplitOn in valuesInColumn: newDataFlow = dataFlow.filter( dataFlow[operationFlag] == valueToSplitOn.value) # Create a new name for this data flow based on concatenation of source dataflow, column name and value used for filter newDataName = dataName + '_' + operationFlag + '_' + valueToSplitOn.value newDataProfile = newDataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(newDataProfile, newDataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats( newDataFlow, newDataProfile, newDataName, thisStageNumber, operatorToUse, operationFlag) # Capture the column inventory for the new dataflow columnInventoryIntermediate = columnInventoryIntermediate.append( columnInventory) # Capture the data flow inventory for the new data flow dataFlowInventoryIntermediate = dataFlowInventoryIntermediate.append( dataFlowInventory) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage( newDataFlow, newDataName, thisStageNumber, qualityFlag) print('{0}: saved package to {1}'.format( newDataName, targetPackagePath)) else: print('{0}: no operation required'.format(dataName)) # Generate column and data flow inventories for the source dataflow columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) columnInventoryIntermediate = columnInventoryIntermediate.append( columnInventory) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventoryIntermediate = dataFlowInventoryIntermediate.append( dataFlowInventory) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, qualityFlag) print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) # Now return all of the components badk to the main loop... return dataFlow, columnInventoryIntermediate, dataFlowInventoryIntermediate else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def quarantineRows(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) # Now perform the operation on the dataFlow : ie remove the number of rows specified from the top # First count the number of columns found dataFlowColumns = list(dataFlow.get_profile().columns.keys()) numberOfColumnsFound = len(dataFlowColumns) # Now convert the operationFlag to an integer headerCount = int(operationFlag) # If we have more columns that expected, we quarantine rows which have values in the extra columns if numberOfColumnsFound > headerCount: # NOTE - this logic assumes that all unwanted columns are on the far right, this could be improved! # Fork a new data flow with rows that have data in the un-expected columns print( '{0}: we have found {1} columns, expected {2} so will now quarantine any rows with data in them ' .format(dataName, numberOfColumnsFound, headerCount)) quarantinedDataFlow = dataFlow.drop_nulls( dataFlowColumns[headerCount:]) # There is a chance we have an extra column but no rows to quarantine, so check this first if quarantinedDataFlow.row_count is None: quarantinedRowCount = 0 print('{0}: no rows to quarantine'.format(dataName)) else: quarantinedRowCount = quarantinedDataFlow.row_count # Finally save the data flow so it can be used later fullPackagePath = saveDataFlowPackage(quarantinedDataFlow, dataName, thisStageNumber, 'B') print('{0}: quarantined {1} rows of data to {2}'.format( dataName, quarantinedRowCount, fullPackagePath)) # Now filter out the quarantined rows from the main data set # NOTE : can't figure out a better way of doign this for now - see note below... for columnToCheck in dataFlowColumns[headerCount:]: # NOTE - don't know why commented line of code below doesn't work! # dataFlow = dataFlow.filter(dataFlow[columnToCheck] != '') dataFlow = dataFlow.assert_value(columnToCheck, value != '', error_code='ShouldBeNone') dataFlow = dataFlow.filter(col(columnToCheck).is_error()) print('{0}: filtered column {1}, row count now {2}'.format( dataName, columnToCheck, dataFlow.row_count)) # Finally drop the extra columns dataFlow = dataFlow.drop_columns(dataFlowColumns[headerCount:]) print('{0}: dropped {1} unwanted columns'.format( dataName, len(dataFlowColumns[headerCount:]))) else: print( '{0}: we have found {1} columns, expected {2} so not going to do anything' .format(dataName, numberOfColumnsFound, headerCount)) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, qualityFlag) print('{0}: saved package to {1}'.format(dataName, targetPackagePath)) # Now return all of the components badk to the main loop... return dataFlow, columnInventory, dataFlowInventory else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None
def joinTables(dataName, previousStageNumber, thisStageNumber, qualityFlag, operatorToUse, operationFlag): dataFlow, fullPackagePath = openDataFlowPackage(dataName, previousStageNumber, qualityFlag) if dataFlow: print('{0}: loaded package from path {1}'.format( dataName, fullPackagePath)) # Set up empty intermediate dataframes that we will use to build up inventories at both dataFlow and column level dataFlowInventoryIntermediate = pd.DataFrame() columnInventoryIntermediate = pd.DataFrame() if operationFlag != '': # Load config file joinConfig = dprep.read_csv('./Config/' + operationFlag).to_pandas_dataframe() # For each config in the file... for index, row in joinConfig.iterrows(): leftDataName = row['LeftDataName'] leftDataFlowJoinColumn = row['LeftDataFlowJoinColumn'] rightDataName = row['RightDataName'] rightDataFlowJoinColumn = row['RightDataFlowJoinColumn'] joinType = row['JoinType'] print( '{0}: ready to join {1} {2} -> {3} {4} using jointype {5}'. format(dataName, leftDataName, leftDataFlowJoinColumn, rightDataName, rightDataFlowJoinColumn, joinType)) # Load right hand data flow rightDataFlow, fullPackagePath = openDataFlowPackage( rightDataName, previousStageNumber, qualityFlag) print('{0}: loaded package from path {1}'.format( rightDataName, fullPackagePath)) # We always perform the inner "MATCH" stype join join_builder = dataFlow.builders.join( right_dataflow=rightDataFlow, left_column_prefix=dataName + '_', right_column_prefix=rightDataName + '_') join_builder.detect_column_info() join_builder.join_key_pairs = [(leftDataFlowJoinColumn, rightDataFlowJoinColumn)] # Setting up join type: # NONE = 0 # MATCH = 2 # UNMATCHLEFT = 4 # UNMATCHRIGHT = 8 join_builder.join_type = 2 innerDataFlow = join_builder.to_dataflow() print('{0} created inner dataflow : Columns : {1}, Rows : {2}'. format(dataName, len(innerDataFlow.get_profile().columns), innerDataFlow.row_count)) if joinType == "LEFT": # Use the "UNMATCHLEFT" setting to grab the rows that haven't been joined from the left data flow join_builder.join_type = 4 leftUnmatchedDataFlow = join_builder.to_dataflow() print( '{0} created left unmatched dataflow : Columns : {1}, Rows : {2}' .format( dataName, len(leftUnmatchedDataFlow.get_profile().columns), leftUnmatchedDataFlow.row_count)) # Now append this dataflow to the original inner join dataflow, to create a "left outer join" newDataFlow = innerDataFlow.append_rows( [leftUnmatchedDataFlow]) else: newDataFlow = innerDataFlow # Create a new name for this data flow based on concatenation of left dataflow and right newDataName = dataName + '_' + rightDataName # Output key stats print('{0} left table : {0}, Columns : {1}, Rows : {2}'.format( leftDataName, len(dataFlow.get_profile().columns), dataFlow.row_count)) print( '{0} right table : {0}, Columns : {1}, Rows : {2}'.format( rightDataName, len(rightDataFlow.get_profile().columns), rightDataFlow.row_count)) newDataProfile = newDataFlow.get_profile() print( '{0} joined table : {0}, Columns : {1}, Rows : {2}'.format( newDataName, len(newDataProfile.columns), newDataFlow.row_count)) # Now generate column and data flow inventories columnInventory = getColumnStats(newDataProfile, newDataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventory = getDataFlowStats( newDataFlow, newDataProfile, newDataName, thisStageNumber, operatorToUse, operationFlag) # Capture the column inventory for the new dataflow columnInventoryIntermediate = columnInventoryIntermediate.append( columnInventory) # Capture the data flow inventory for the new data flow dataFlowInventoryIntermediate = dataFlowInventoryIntermediate.append( dataFlowInventory) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage( newDataFlow, newDataName, thisStageNumber, 'A') print('{0}: saved package to {1}'.format( newDataName, targetPackagePath)) else: print('{0}: no joining of tables required'.format(dataName)) dataProfile = dataFlow.get_profile() # Now generate column and data flow inventories columnInventory = getColumnStats(dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) columnInventoryIntermediate = columnInventoryIntermediate.append( columnInventory) dataFlowInventory = getDataFlowStats(dataFlow, dataProfile, dataName, thisStageNumber, operatorToUse, operationFlag) dataFlowInventoryIntermediate = dataFlowInventoryIntermediate.append( dataFlowInventory) # Finally save the data flow so it can be passed onto the next stage of the process... targetPackagePath = saveDataFlowPackage(dataFlow, dataName, thisStageNumber, qualityFlag) print('{0}: saved source package to {1}'.format( dataName, targetPackagePath)) return dataFlow, columnInventoryIntermediate, dataFlowInventoryIntermediate else: print('{0}: no package file found at location {1}'.format( dataName, fullPackagePath)) return None, None, None