def preflightPostingValidation(self, parent_trace, posted_content_df): ''' Method performs some initial validation of the `dataframe`, which is intended to be a DataFrame representation of the data posted in Excel. The intention for this preflight validation is to provide the user with more user-friendly error messages that educate the user on what he/she should change in the posting for it to be valid. In the absence of this preflight validation, the posting error from the user would eventually be caught deeper in the parsing logic, by which time the error generated might not be too user friendly. Thus this method is not so much to avoid corruption of the data, since downstream logic will prevent corruption anyway. Rather, it is to provide usability by outputting high-level user-meaningful error messages. ''' # GOTCHA: A mandatory column like "Big Rocks" might become "big-rocks" after the first posting, i.e., # the generated form used for updates will have a column called "big-rocks", not "Big Rocks". # To avoid erroring out when the situation is rather innocent, the check below does # not compare "raw column names", but "formatted columns names" using a formatter that # converts things like "Big Rocks" to "big-rocks" FMT = StringUtils().format_as_yaml_fieldname # Abbreviation for readability posted_cols = [FMT(col) for col in posted_content_df.columns] mandatory_cols = [FMT(self._entity_name)] missing_cols = [col for col in mandatory_cols if not col in posted_cols] if len(missing_cols) > 0: raise ApodeixiError(parent_trace, "Posting lacks some mandatory columns. This often happens if " + "ranges are wrong in Posting Label.", data = { 'Missing columns': missing_cols, 'Posted columns': posted_cols})
def preflightPostingValidation(self, parent_trace, posted_content_df): ''' Method performs some initial validation of the `dataframe`, which is intended to be a DataFrame representation of the data posted in Excel. The intention for this preflight validation is to provide the user with more user-friendly error messages that educate the user on what he/she should change in the posting for it to be valid. In the absence of this preflight validation, the posting error from the user would eventually be caught deeper in the parsing logic, by which time the error generated might not be too user friendly. Thus this method is not so much to avoid corruption of the data, since downstream logic will prevent corruption anyway. Rather, it is to provide usability by outputting high-level user-meaningful error messages. ''' ME = MilestonesController._MilestonesConfig posted_cols = list(posted_content_df.columns) mandatory_cols = [ME._ENTITY_NAME] #mandatory_cols.extend(ME._SPLITTING_COLUMNS) #missing_cols = [col for col in mandatory_cols if not col in posted_cols] missing_cols = [ col for col in mandatory_cols if not StringUtils().is_in_as_yaml(col, posted_cols) ] if len(missing_cols) > 0: raise ApodeixiError( parent_trace, "Posting lacks some mandatory columns. This often happens if " + "ranges are wrong in Posting Label.", data={ 'Missing columns': missing_cols, 'Posted columns': posted_cols })
def manifestNameFromCoords(self, parent_trace, subnamespace, coords, kind): ''' Helper method that returns what the 'name' field should be in the manifest to be created with the given filing coords, possibly complemented by the subnamespace. Usually used in the context of generating forms. Example: consider a manifest name like "modernization.dec-2020.fusionopus.default" in namespace "my-corp.production". To build such a name, this method must receive "modernization" as the subnamespace, and filing coords from which to infer "dec-20220", "fusionopus", and "default". @param subnamespace A string, which is allowed to be None. If not null, this is a further partioning of the namespace into finer slices, and a manifest's name is supposed to identify the slice in which the manifest resides. @param coords A FilingCoords object corresponding to this controller. It is used, possibly along with the `subnamespace` parameter, to build a manifest name. @param kind The kind of manifest for which the name is sought. This parameter can be ignored for controller classes that use the same name for all supported kinds; it is meant to support controllers that process multiple manifest kinds and do not use the same name for all of them. For example, controllers that point to reference data in a different domain/sub-domain. ''' FMT = StringUtils().format_as_yaml_fieldname # Abbreviation for readability name = FMT(StaticData_Controller._STATIC_DATA) return name
def addLink(self, parent_trace, row_number, uid): ''' Remembers an association between the row_number and the uid @param row_number An int representing a row number in a tabular representation of a manifest's data. For example, and Excel row number of DataFrame row number. @param uid A string representing a unique identifier of a node in a manifest's assertion tree. For example, "P4.C5" ''' if type(row_number) != int: raise ApodeixiError( parent_trace, "Can't add a link with a row number that is not an int", data={"type(row_number)": str(type(row_number))}) if type(uid) != str: raise ApodeixiError( parent_trace, "Can't add a link with a uid that is not an string", data={"type(uid)": str(type(uid))}) if StringUtils().is_blank(uid): raise ApodeixiError(parent_trace, "Can't add a link with a blank uid", data={"uid": str(uid)}) # If we get this far, all checks passed. So add the link self.row_2_uid[row_number] = uid self.uid_2_row[uid] = row_number
def determineNamespace(self, parent_trace): ''' Returns the namespace implied by the fields of this posting label ''' raw_namespace = self.organization( parent_trace) + "." + self.knowledgeBaseArea(parent_trace) namespace = StringUtils().format_as_yaml_fieldname(raw_namespace) return namespace
def mask_timestamps(a_dict): new_dict = {} for key in a_dict: raw_child = a_dict[key] if type(raw_child) == dict: new_child = mask_timestamps(raw_child) else: new_child = raw_child new_key = StringUtils().mask_timestamp(key) new_dict[new_key] = new_child return new_dict
def is_blank(self, txt): ''' Returns True if 'txt' is NaN or just spaces ''' CLEAN = DataFrameUtils( ).clean # Avoid problems with nan, numpy classes, dates, NaTs, etc. y = CLEAN(txt) if type(y) == str: return StringUtils().is_blank(y) else: return False
def subnamespaceFromLabel(self, parent_trace, label): ''' Helper method that returns what the 'subnamespace' that is a portion of a manifest's name. It is inferred from a `label` that provides the posting details for a manifest that should be created. Returns a string corresponding to the subnamespace, if one applies to this `kind` of manifest. If no subnamespace applies, returns None. ''' program = label.program(parent_trace) FMT = StringUtils( ).format_as_yaml_fieldname # Abbreviation for readability return FMT(program)
def non_entity_cols(self): ''' Returns a list of strings, corresponding to the Interval's columns that are not the entity type ''' #GOTCHA: Don't compare column names to the entity name directly, since there might be spurious # differences due to lower/upper case. Instead, format as a yaml field to have a standard # policy on case, space, hyphens, etc. prior to comparison FMT = StringUtils().format_as_yaml_fieldname result = [ col for col in self.columns if FMT(col) != FMT(self.entity_name) ] return result
def get_scoring_cycles(self, parent_trace, kb_session, environment_filter): ''' Returns a nicely formatted string, suitable for CLI output. It displays all valid scoring cycles of the system. @param environment_filter A lambda function, that takes a string argument and returns True or False. Its purposes is to filte out which KnowledgeBase store's environments to include when searching for scoring cycles. If it is None, then all environments are included. ''' JOURNEY_COL = 'journey' SCORING_CYCLE_COL = 'Scoring Cycle' SCENARIO_COL = 'Scenario' expected_organization = kb_session.a6i_config.getOrganization(parent_trace) allowed_kb_areas = kb_session.a6i_config.getKnowledgeBaseAreas(parent_trace) FMT = StringUtils().format_as_yaml_fieldname namespaces = [FMT(expected_organization + "."+ kb_area) for kb_area in allowed_kb_areas] description_table = [] description_headers = ["Journey", "Scoring cycle", "Scenario", "Namespace", "Environment"] environments = [] environments.append(kb_session.store.base_environment(parent_trace).name(parent_trace)) environments.extend(self._sandboxes_names_list(parent_trace, kb_session)) if environment_filter != None: environments = [e for e in environments if environment_filter(e) == True] original_env_name = kb_session.store.current_environment(parent_trace).name(parent_trace) for env_name in environments: kb_session.store.activate(parent_trace, env_name) for ns in namespaces: validator = StaticDataValidator(parent_trace, kb_session.store, kb_session.a6i_config) try: sc_df = validator.getScoringCycles(parent_trace, ns) for row in sc_df.iterrows(): description_table.append([row[1][JOURNEY_COL], row[1][SCORING_CYCLE_COL], row[1][SCENARIO_COL], ns, env_name]) except ApodeixiError as ex: if ex.msg.startswith("Static data of type 'scoring-cycle' is not configured for namespace"): # If so just ignore this error, since perhaps that namespace has no products but maybe # other namespaces do continue else: raise ex kb_session.store.activate(parent_trace, original_env_name) description = "\n\n" description += tabulate(description_table, headers=description_headers) description += "\n" return description
def manifestNameFromLabel(self, parent_trace, label, kind): ''' Helper method that returns what the 'name' field should be in the manifest to be created with the given label @param kind The kind of manifest for which the name is sought. This parameter can be ignored for controller classes that use the same name for all supported kinds; it is meant to support controllers that process multiple manifest kinds and do not use the same name for all of them. For example, controllers that point to reference data in a different domain/sub-domain. ''' FMT = StringUtils().format_as_yaml_fieldname # Abbreviation for readability name = FMT(StaticData_Controller._STATIC_DATA) return name
def get_namespaces(self, parent_trace, kb_session): ''' Returns a nicely formatted string, suitable for CLI output. It displays the valid namespaces of the system. ''' expected_organization = kb_session.a6i_config.getOrganization(parent_trace) allowed_kb_areas = kb_session.a6i_config.getKnowledgeBaseAreas(parent_trace) FMT = StringUtils().format_as_yaml_fieldname description_table = [[FMT(expected_organization + "." + kb_area)] for kb_area in allowed_kb_areas] description_headers = ["Namespace"] description = "\n\n" description += tabulate(description_table, headers=description_headers) description += "\n" return description
def _strip_parenthesis(parent_trace, txt): if type(txt) != str: raise ApodeixiError( parent_trace, "Encountered problem removing comments in parenthesis: expected a string, " + "but instead was given a '" + str(type(txt)), data={"invalid input": str(txt)}) stripped_txt = StringUtils().strip(txt) # Remove text within parenthesis, if any, using the natural language tool nltk.tokenize.SExprTokenizer sexpr = SExprTokenizer(strict=False) sexpr_tokens = sexpr.tokenize(stripped_txt) parenthesis_free_tokens = [ t for t in sexpr_tokens if not ')' in t and not '(' in t ] parentheis_free_txt = ' '.join(parenthesis_free_tokens) return parentheis_free_txt
def manifestNameFromCoords(self, parent_trace, subnamespace, coords, kind): ''' Helper method that returns what the 'name' field should be in the manifest to be created with the given filing coords, possibly complemented by the subnamespace. Usually used in the context of generating forms. Example: consider a manifest name like "modernization.dec-2020.fusionopus.default" in namespace "my-corp.production". To build such a name, this method must receive "modernization" as the subnamespace, and filing coords from which to infer "dec-20220", "fusionopus", and "default". @param subnamespace A string, which is allowed to be None. If not null, this is a further partioning of the namespace into finer slices, and a manifest's name is supposed to identify the slice in which the manifest resides. @param coords A FilingCoords object corresponding to this controller. It is used, possibly along with the `subnamespace` parameter, to build a manifest name. @param kind The kind of manifest for which the name is sought. This parameter can be ignored for controller classes that use the same name for all supported kinds; it is meant to support controllers that process multiple manifest kinds and do not use the same name for all of them. For example, controllers that point to reference data in a different domain/sub-domain. ''' if not type(coords) == InitiativesFilingCoordinates: raise ApodeixiError( parent_trace, "Can't build manifest name because received wrong type of filing coordinates", data={ "Type of coords received": str(type(coords)), "Expected type of coords": "InitiativesFilingCoordinates" }) workstream_UID = coords.workstream_UID program = subnamespace initiative = coords.initiative scenario = coords.scenario scoring_cycle = coords.scoring_cycle FMT = StringUtils( ).format_as_yaml_fieldname # Abbreviation for readability name = FMT(program + '.' + scoring_cycle + '.' + initiative + '.' + workstream_UID + '.' + scenario) return name
def manifestNameFromLabel(self, parent_trace, label, kind): ''' Helper method that returns what the 'name' field should be in the manifest to be created with the given label @param kind The kind of manifest for which the name is sought. This parameter can be ignored for controller classes that use the same name for all supported kinds; it is meant to support controllers that process multiple manifest kinds and do not use the same name for all of them. For example, controllers that point to reference data in a different domain/sub-domain. ''' program = label.program(parent_trace) workstream_UID = label.workstream_UID(parent_trace) initiative = label.initiative(parent_trace) scenario = label.scenario(parent_trace) scoring_cycle = label.scoring_cycle(parent_trace) FMT = StringUtils( ).format_as_yaml_fieldname # Abbreviation for readability name = FMT(program + '.' + scoring_cycle + '.' + initiative + '.' + workstream_UID + '.' + scenario) return name
def entity_as_yaml_fieldname(self): ME = MilestonesController._MilestonesConfig return StringUtils().format_as_yaml_fieldname(ME._ENTITY_NAME)
def manifests_description(self, parent_trace, kb_session, kinds_of_interest, labels_of_interest, environment_filter): ''' Returns a nicely formatted string, suitable for CLI output. It displays summary information for all manifests that the KnowledgeBase currenly supports whose kind is one of the `kinds_of_interest` and which have all the labels of interest. @param kinds_of_interest A list of strings, corresponding to manifest kinds we seek. If null, then we will collect all kinds known to the system. @param labels_of_interest A list of strings of the form "<field>=<value>", which constrains while manifests are returned by forcing that each of them has <field> as a label with value <value>. If set to None, then all manifests are included. @param environment_filter A lambda function, that takes a string argument and returns True or False. Its purposes is to filte out which KnowledgeBase store's environments to include when searching for products. If it is None, then all environments are included ''' FMT = StringUtils().format_as_yaml_fieldname description_table = [] description_headers = ["Kind", "Version", "Estimated on", "Recorded by", "Namespace", "Name", "Environment"] environments = [] environments.append(kb_session.store.base_environment(parent_trace).name(parent_trace)) environments.extend(self._sandboxes_names_list(parent_trace, kb_session)) if environment_filter != None: environments = [e for e in environments if environment_filter(e) == True] if kinds_of_interest == None: kinds_of_interest = self._get_all_kinds(parent_trace, kb_session) original_env_name = kb_session.store.current_environment(parent_trace).name(parent_trace) for env_name in environments: kb_session.store.activate(parent_trace, env_name) def _manifest_filter(parent_trace, manifest_dict): #TODO For now we just approve everything. Later will need to filter on labels return True manifest_dict_list = kb_session.store.searchManifests(parent_trace, kinds_of_interest, manifest_filter = _manifest_filter) # ["Kind", "Version", "Estimated on", "Namespace", "Name", "Environment"] GET = DictionaryUtils().get_val for m_dict in manifest_dict_list: kind = GET(parent_trace, m_dict, "Manifest", ["kind"], [str]) version = GET(parent_trace, m_dict, "Manifest", ["metadata", "version"], [int]) estimated_on = GET(parent_trace, m_dict, "Manifest", ["assertion", "estimatedOn"], [datetime]) recorded_by = GET(parent_trace, m_dict, "Manifest", ["assertion", "recordedBy"], [datetime]) namespace = GET(parent_trace, m_dict, "Manifest", ["metadata", "namespace"], [str]) name = GET(parent_trace, m_dict, "Manifest", ["metadata", "name"], [str]) description_table.append([kind, version, estimated_on, recorded_by, namespace, name, env_name]) # To ensure output to be predictable (e.g., for regression tests) we sort the description table. We found that # otherwise some tests that pass in Windows will fail when run in a Linux container. # KIND_IDX = 0 VERSION_IDX = 1 description_table = sorted(description_table, key=lambda entry: entry[KIND_IDX] + str(entry[VERSION_IDX])) kb_session.store.activate(parent_trace, original_env_name) description = "\n\n" description += tabulate(description_table, headers=description_headers) description += "\n" return description
def entity_as_yaml_fieldname(self): return StringUtils().format_as_yaml_fieldname(self._entity_name)
def diff(kb_session, manifest_api, kind, namespace, name): ''' Makes a diff between two versions of a manifest. For a list of valid MANIFEST_APIs and KINDs, try 'get apis' For a list of valid NAMESPACEs and NAMEs, try 'get assertions' MANIFEST_API must be a versionless manifest API. Example: 'delivery-planning.journeys.a6i.io', (as opposed to 'delivery-planning.journeys.a6i.io/v1a'). ''' timer = ApodeixiTimer() func_trace = FunctionalTrace(parent_trace=None, path_mask=None) root_trace = func_trace.doing("CLI call to post", origination={'signaled_from': __file__}) kb_operation_succeeded = False try: my_trace = root_trace.doing( "Invoking ManifestUtils's postByFile service") diff_result = ManifestUtils().diff_manifest( parent_trace=my_trace, store=kb_session.store, manifest_api_name=manifest_api, namespace=namespace, name=name, kind=kind, version1=None, version2=None) kb_operation_succeeded = True diff_description = CLI_Utils().describe_diff_response( my_trace, kb_session, diff_result) # GOTCHA: # Make sure to remove non-ascii characters before passing the description to click.echo, since it # will raise errors if there are characters like \uFFFFD in the description # diff_description = StringUtils().to_ascii(diff_description) click.echo(diff_description) output = "Success" click.echo(output) click.echo(timer.elapsed_time_message()) except ApodeixiError as ex: error_msg = CLI_ErrorReporting(kb_session).report_a6i_error( parent_trace=root_trace, a6i_error=ex) if kb_operation_succeeded: error_msg = "KnowledgeBase operation completed, but run into a problem when preparing "\ + "a description of the response:\n"\ + error_msg # GOTCHA # Use print, not click.echo or click exception because they don't correctly display styling # (colors, underlines, etc.). So use vanilla Python print and then exit print(error_msg) _sys.exit() except Exception as ex: try: error_msg = CLI_ErrorReporting(kb_session).report_generic_error( parent_trace=root_trace, generic_error=ex) if kb_operation_succeeded: error_msg = "KnowledgeBase operation completed, but run into a problem when preparing "\ + "a description of the response:\n"\ + error_msg except Exception as ex2: error_msg = "CLI run into trouble: found error:\n\n\t" + str(ex) + "\n\n" \ + "To make things worse, when trying to produce an error log file with a "\ + "stack trace, run into an additional error:\n\n\t" + str(ex2) # GOTCHA # Use print, not click.echo or click exception because they don't correctly display styling # (colors, underlines, etc.). So use vanilla Python print and then exit print(error_msg) _sys.exit()
def entity_as_yaml_fieldname(self): ME = Mock_Controller._AccountHierarchyConfig return StringUtils().format_as_yaml_fieldname(ME._ENTITY_NAME)
def checkReferentialIntegrity(self, parent_trace): ''' Used to check that the values of Posting Label fields are valid. Does not return a value, but will raise an exception if any field is "invalid". Sometimes this validation might be against data configured in the ApodeixiConfig. Example: "organization" In other situations the validation is against the existence of static data objects which the label references. Example: "product" in the case of the Journeys domain. NOTE: This method is intended to be called *after* label.read(-) has completed, including any label.read(-) implemented by derived classes. That is why it can't be called within label.read(-) at the PostingLabel parent class level, and why the design choice was made to have the calling code invoke this check right after calling label.read() ''' super().checkReferentialIntegrity(parent_trace) # In addition to checks made by the parent class, we want to check that references to read-only manifests # are correct. Specifically, we want to make sure that milestones manifest references the most recent version # of the big-rocks manifest, before we accept the submitted Excel for the milestones manifest. # # So we check that the version of the big-rocks in the Posting Label is indeed the most recent version of the # big-rocks. my_trace = parent_trace.doing( "Checking milestones reference most recent big-rocks") ME = MilestonesController manifest_api_name = self.controller.getManifestAPI().apiName() organization = self.organization(my_trace) kb_area = self.knowledgeBaseArea(my_trace) FMT = StringUtils( ).format_as_yaml_fieldname # Abbreviation for readability namespace = FMT(organization + '.' + kb_area) manifest_name = self.controller.manifestNameFromLabel( my_trace, label=self, kind=ME.REFERENCED_KIND) manifest_dict, manifest_path = self.controller.store.findLatestVersionManifest( parent_trace=my_trace, manifest_api_name=manifest_api_name, namespace=namespace, name=manifest_name, kind=ME.REFERENCED_KIND) BIG_ROCKS_MANIFEST_NB = 0 referenced_manifest_nb = BIG_ROCKS_MANIFEST_NB last_version_nb = ManifestUtils().get_manifest_version( my_trace, manifest_dict) submitted_version_nb = self.priorVersion(my_trace, referenced_manifest_nb) if submitted_version_nb < last_version_nb: raise ApodeixiError( my_trace, "Excel form needs to be refreshed and re-submitted because it does not reference " + "the most recent version of the '" + ME.REFERENCED_KIND + "'. Request a new form " + "for '" + ME.MY_KIND + "' to reflect the correct version for '" + ME.REFERENCED_KIND + "' and re-apply your changes to that form, and re-submit", data={ "version submitted": str(submitted_version_nb), "latest version": str(last_version_nb) }) if submitted_version_nb > last_version_nb: raise ApodeixiError( my_trace, "Excel form needs to be refreshed and re-submitted because it references " + "a non-existent version of the '" + ME.REFERENCED_KIND + "'. Request a new form " + "for '" + ME.MY_KIND + "' to reflect the correct version for '" + ME.REFERENCED_KIND + "' and re-apply your changes to that form, and re-submit", data={ "version submitted": str(submitted_version_nb), "latest version": str(last_version_nb) })