Пример #1
0
        def preflightPostingValidation(self, parent_trace, posted_content_df):
            '''
            Method performs some initial validation of the `dataframe`, which is intended to be a DataFrame representation of the
            data posted in Excel.

            The intention for this preflight validation is to provide the user with more user-friendly error messages that
            educate the user on what he/she should change in the posting for it to be valid. In the absence of this 
            preflight validation, the posting error from the user would eventually be caught deeper in the parsing logic,
            by which time the error generated might not be too user friendly.

            Thus this method is not so much to avoid corruption of the data, since downstream logic will prevent corruption
            anyway. Rather, it is to provide usability by outputting high-level user-meaningful error messages.
            '''

            # GOTCHA: A mandatory column like "Big Rocks" might become "big-rocks" after the first posting, i.e.,
            #           the generated form used for updates will have a column called "big-rocks", not "Big Rocks".
            #           To avoid erroring out when the situation is rather innocent, the check below does
            #           not compare "raw column names", but "formatted columns names" using a formatter that
            #           converts things like "Big Rocks" to "big-rocks"
            FMT                             = StringUtils().format_as_yaml_fieldname # Abbreviation for readability

            posted_cols                     = [FMT(col) for col in posted_content_df.columns]
            mandatory_cols                  = [FMT(self._entity_name)]
            missing_cols                    = [col for col in mandatory_cols if not col in posted_cols]
            if len(missing_cols) > 0:
                raise ApodeixiError(parent_trace, "Posting lacks some mandatory columns. This often happens if "
                                                    + "ranges are wrong in Posting Label.",
                                                    data = {    'Missing columns':    missing_cols,
                                                                'Posted columns':     posted_cols})
Пример #2
0
        def preflightPostingValidation(self, parent_trace, posted_content_df):
            '''
            Method performs some initial validation of the `dataframe`, which is intended to be a DataFrame representation of the
            data posted in Excel.

            The intention for this preflight validation is to provide the user with more user-friendly error messages that
            educate the user on what he/she should change in the posting for it to be valid. In the absence of this 
            preflight validation, the posting error from the user would eventually be caught deeper in the parsing logic,
            by which time the error generated might not be too user friendly.

            Thus this method is not so much to avoid corruption of the data, since downstream logic will prevent corruption
            anyway. Rather, it is to provide usability by outputting high-level user-meaningful error messages.
            '''
            ME = MilestonesController._MilestonesConfig
            posted_cols = list(posted_content_df.columns)
            mandatory_cols = [ME._ENTITY_NAME]
            #mandatory_cols.extend(ME._SPLITTING_COLUMNS)
            #missing_cols                    = [col for col in mandatory_cols if not col in posted_cols]

            missing_cols = [
                col for col in mandatory_cols
                if not StringUtils().is_in_as_yaml(col, posted_cols)
            ]

            if len(missing_cols) > 0:
                raise ApodeixiError(
                    parent_trace,
                    "Posting lacks some mandatory columns. This often happens if "
                    + "ranges are wrong in Posting Label.",
                    data={
                        'Missing columns': missing_cols,
                        'Posted columns': posted_cols
                    })
Пример #3
0
    def manifestNameFromCoords(self, parent_trace, subnamespace, coords, kind):
        '''
        Helper method that returns what the 'name' field should be in the manifest to be created with the given
        filing coords, possibly complemented by the subnamespace.

        Usually used in the context of generating forms.

        Example: consider a manifest name like "modernization.dec-2020.fusionopus.default"
                in namespace "my-corp.production". 

                To build such a name, this method must receive "modernization" as the subnamespace, and
                filing coords from which to infer "dec-20220", "fusionopus", and "default".

        @param subnamespace A string, which is allowed to be None. If not null, this is a further partioning of
                        the namespace into finer slices, and a manifest's name is supposed to identify the slice
                        in which the manifest resides.

        @param coords A FilingCoords object corresponding to this controller. It is used, possibly along with the
                        `subnamespace` parameter, to build a manifest name.
        @param kind The kind of manifest for which the name is sought. This parameter can be ignored for controller
                    classes that use the same name for all supported kinds; it is meant to support controllers that
                    process multiple manifest kinds and do not use the same name for all of them. For example, controllers
                    that point to reference data in a different domain/sub-domain.
        '''
        FMT                             = StringUtils().format_as_yaml_fieldname # Abbreviation for readability
        name                            = FMT(StaticData_Controller._STATIC_DATA)
        return name
Пример #4
0
        def addLink(self, parent_trace, row_number, uid):
            '''
            Remembers an association between the row_number and the uid

            @param row_number An int representing a row number in a tabular representation of a manifest's data. For 
                                example, and Excel row number of DataFrame row number.
            @param uid A string representing a unique identifier of a node in a manifest's assertion tree. For example,
                        "P4.C5"
            '''
            if type(row_number) != int:
                raise ApodeixiError(
                    parent_trace,
                    "Can't add a link with a row number that is not an int",
                    data={"type(row_number)": str(type(row_number))})

            if type(uid) != str:
                raise ApodeixiError(
                    parent_trace,
                    "Can't add a link with a uid that is not an string",
                    data={"type(uid)": str(type(uid))})

            if StringUtils().is_blank(uid):
                raise ApodeixiError(parent_trace,
                                    "Can't add a link with a blank uid",
                                    data={"uid": str(uid)})

            # If we get this far, all checks passed. So add the link
            self.row_2_uid[row_number] = uid
            self.uid_2_row[uid] = row_number
 def determineNamespace(self, parent_trace):
     '''
     Returns the namespace implied by the fields of this posting label
     '''
     raw_namespace = self.organization(
         parent_trace) + "." + self.knowledgeBaseArea(parent_trace)
     namespace = StringUtils().format_as_yaml_fieldname(raw_namespace)
     return namespace
Пример #6
0
 def mask_timestamps(a_dict):
     new_dict = {}
     for key in a_dict:
         raw_child = a_dict[key]
         if type(raw_child) == dict:
             new_child = mask_timestamps(raw_child)
         else:
             new_child = raw_child
         new_key = StringUtils().mask_timestamp(key)
         new_dict[new_key] = new_child
     return new_dict
Пример #7
0
 def is_blank(self, txt):
     '''
     Returns True if 'txt' is NaN or just spaces
     '''
     CLEAN = DataFrameUtils(
     ).clean  # Avoid problems with nan, numpy classes, dates, NaTs, etc.
     y = CLEAN(txt)
     if type(y) == str:
         return StringUtils().is_blank(y)
     else:
         return False
Пример #8
0
    def subnamespaceFromLabel(self, parent_trace, label):
        '''
        Helper method that returns what the 'subnamespace' that is a portion of a manifest's name.
        It is inferred from a `label` that provides the posting details for a manifest that should be created.

        Returns a string corresponding to the subnamespace, if one applies to this `kind` of manifest.
        If no subnamespace applies, returns None.
        '''
        program = label.program(parent_trace)
        FMT = StringUtils(
        ).format_as_yaml_fieldname  # Abbreviation for readability
        return FMT(program)
Пример #9
0
 def non_entity_cols(self):
     '''
     Returns a list of strings, corresponding to the Interval's columns that are not the entity type
     '''
     #GOTCHA: Don't compare column names to the entity name directly, since there might be spurious
     #       differences due to lower/upper case. Instead, format as a yaml field to have a standard
     #       policy on case, space, hyphens, etc. prior to comparison
     FMT = StringUtils().format_as_yaml_fieldname
     result = [
         col for col in self.columns if FMT(col) != FMT(self.entity_name)
     ]
     return result
Пример #10
0
    def get_scoring_cycles(self, parent_trace, kb_session, environment_filter):
        '''
        Returns a nicely formatted string, suitable for CLI output. It displays all valid scoring cycles of the system.

        @param environment_filter A lambda function, that takes a string argument and returns True or False.
            Its purposes is to filte out which KnowledgeBase store's environments to include when searching
            for scoring cycles. If it is None, then all environments are included.
        '''
        JOURNEY_COL                     = 'journey'
        SCORING_CYCLE_COL               = 'Scoring Cycle'
        SCENARIO_COL                    = 'Scenario'

        expected_organization           = kb_session.a6i_config.getOrganization(parent_trace)
        allowed_kb_areas                = kb_session.a6i_config.getKnowledgeBaseAreas(parent_trace)

        FMT                             = StringUtils().format_as_yaml_fieldname
        namespaces                      = [FMT(expected_organization + "."+ kb_area) for kb_area in allowed_kb_areas]

        description_table               = []
        description_headers             = ["Journey", "Scoring cycle", "Scenario", "Namespace", "Environment"]

        environments                    = []
        environments.append(kb_session.store.base_environment(parent_trace).name(parent_trace))
        environments.extend(self._sandboxes_names_list(parent_trace, kb_session))

        if environment_filter != None:
            environments                = [e for e in environments if environment_filter(e) == True]

        original_env_name               = kb_session.store.current_environment(parent_trace).name(parent_trace)
        for env_name in environments:
            kb_session.store.activate(parent_trace, env_name)
            for ns in namespaces:
                validator                   = StaticDataValidator(parent_trace, kb_session.store, kb_session.a6i_config)
                try:
                    sc_df                   = validator.getScoringCycles(parent_trace, ns)
                    for row in sc_df.iterrows():
                        description_table.append([row[1][JOURNEY_COL], row[1][SCORING_CYCLE_COL], 
                                                    row[1][SCENARIO_COL], ns, env_name])
                except ApodeixiError as ex:
                    if ex.msg.startswith("Static data of type 'scoring-cycle' is not configured for namespace"):
                        # If so just ignore this error, since perhaps that namespace has no products but maybe 
                        # other namespaces do
                        continue
                    else:
                        raise ex
            
        kb_session.store.activate(parent_trace, original_env_name)

        description                     = "\n\n"
        description                     += tabulate(description_table, headers=description_headers)
        description                     += "\n"

        return description
Пример #11
0
 def manifestNameFromLabel(self, parent_trace, label, kind):
     '''
     Helper method that returns what the 'name' field should be in the manifest to be created with the given
     label
     @param kind The kind of manifest for which the name is sought. This parameter can be ignored for controller
                 classes that use the same name for all supported kinds; it is meant to support controllers that
                 process multiple manifest kinds and do not use the same name for all of them. For example, controllers
                 that point to reference data in a different domain/sub-domain.
     '''
     FMT                             = StringUtils().format_as_yaml_fieldname # Abbreviation for readability
     name                            = FMT(StaticData_Controller._STATIC_DATA)
     return name
Пример #12
0
    def get_namespaces(self, parent_trace, kb_session):
        '''
        Returns a nicely formatted string, suitable for CLI output. It displays the valid namespaces of the system.
        '''
        expected_organization           = kb_session.a6i_config.getOrganization(parent_trace)
        allowed_kb_areas                = kb_session.a6i_config.getKnowledgeBaseAreas(parent_trace)

        FMT                             = StringUtils().format_as_yaml_fieldname
        description_table               = [[FMT(expected_organization + "." + kb_area)] for kb_area in allowed_kb_areas]
        description_headers             = ["Namespace"]

        description                     = "\n\n"
        description                     += tabulate(description_table, headers=description_headers)
        description                     += "\n"

        return description
Пример #13
0
 def _strip_parenthesis(parent_trace, txt):
     if type(txt) != str:
         raise ApodeixiError(
             parent_trace,
             "Encountered problem removing comments in parenthesis: expected a string, "
             + "but instead was given a '" + str(type(txt)),
             data={"invalid input": str(txt)})
     stripped_txt = StringUtils().strip(txt)
     # Remove text within parenthesis, if any, using the natural language tool nltk.tokenize.SExprTokenizer
     sexpr = SExprTokenizer(strict=False)
     sexpr_tokens = sexpr.tokenize(stripped_txt)
     parenthesis_free_tokens = [
         t for t in sexpr_tokens if not ')' in t and not '(' in t
     ]
     parentheis_free_txt = ' '.join(parenthesis_free_tokens)
     return parentheis_free_txt
Пример #14
0
    def manifestNameFromCoords(self, parent_trace, subnamespace, coords, kind):
        '''
        Helper method that returns what the 'name' field should be in the manifest to be created with the given
        filing coords, possibly complemented by the subnamespace.

        Usually used in the context of generating forms.

        Example: consider a manifest name like "modernization.dec-2020.fusionopus.default"
                in namespace "my-corp.production". 

                To build such a name, this method must receive "modernization" as the subnamespace, and
                filing coords from which to infer "dec-20220", "fusionopus", and "default".

        @param subnamespace A string, which is allowed to be None. If not null, this is a further partioning of
                        the namespace into finer slices, and a manifest's name is supposed to identify the slice
                        in which the manifest resides.

        @param coords A FilingCoords object corresponding to this controller. It is used, possibly along with the
                        `subnamespace` parameter, to build a manifest name.
        @param kind The kind of manifest for which the name is sought. This parameter can be ignored for controller
                    classes that use the same name for all supported kinds; it is meant to support controllers that
                    process multiple manifest kinds and do not use the same name for all of them. For example, controllers
                    that point to reference data in a different domain/sub-domain.
        '''
        if not type(coords) == InitiativesFilingCoordinates:
            raise ApodeixiError(
                parent_trace,
                "Can't build manifest name because received wrong type of filing coordinates",
                data={
                    "Type of coords received": str(type(coords)),
                    "Expected type of coords": "InitiativesFilingCoordinates"
                })

        workstream_UID = coords.workstream_UID
        program = subnamespace
        initiative = coords.initiative
        scenario = coords.scenario
        scoring_cycle = coords.scoring_cycle

        FMT = StringUtils(
        ).format_as_yaml_fieldname  # Abbreviation for readability
        name = FMT(program + '.' + scoring_cycle + '.' + initiative + '.' +
                   workstream_UID + '.' + scenario)

        return name
Пример #15
0
    def manifestNameFromLabel(self, parent_trace, label, kind):
        '''
        Helper method that returns what the 'name' field should be in the manifest to be created with the given
        label
        @param kind The kind of manifest for which the name is sought. This parameter can be ignored for controller
                    classes that use the same name for all supported kinds; it is meant to support controllers that
                    process multiple manifest kinds and do not use the same name for all of them. For example, controllers
                    that point to reference data in a different domain/sub-domain.
        '''
        program = label.program(parent_trace)
        workstream_UID = label.workstream_UID(parent_trace)
        initiative = label.initiative(parent_trace)
        scenario = label.scenario(parent_trace)
        scoring_cycle = label.scoring_cycle(parent_trace)

        FMT = StringUtils(
        ).format_as_yaml_fieldname  # Abbreviation for readability
        name = FMT(program + '.' + scoring_cycle + '.' + initiative + '.' +
                   workstream_UID + '.' + scenario)

        return name
Пример #16
0
 def entity_as_yaml_fieldname(self):
     ME = MilestonesController._MilestonesConfig
     return StringUtils().format_as_yaml_fieldname(ME._ENTITY_NAME)
Пример #17
0
    def manifests_description(self, parent_trace, kb_session, kinds_of_interest, labels_of_interest, environment_filter):
        '''
        Returns a nicely formatted string, suitable for CLI output. 
        
        It displays summary information for all manifests that the
        KnowledgeBase currenly supports whose kind is one of the `kinds_of_interest` and
        which have all the labels of interest.

        @param kinds_of_interest A list of strings, corresponding to manifest kinds we seek. If null, then
            we will collect all kinds known to the system.
        @param labels_of_interest A list of strings of the form "<field>=<value>", which constrains
            while manifests are returned by forcing that each of them has <field> as a label with value <value>.
            If set to None, then all manifests are included.

        @param environment_filter A lambda function, that takes a string argument and returns True or False.
            Its purposes is to filte out which KnowledgeBase store's environments to include when searching
            for products. If it is None, then all environments are included
        '''
        FMT                             = StringUtils().format_as_yaml_fieldname

        description_table               = []
        description_headers             = ["Kind", "Version", "Estimated on", "Recorded by", "Namespace", "Name", "Environment"]

        environments                    = []
        environments.append(kb_session.store.base_environment(parent_trace).name(parent_trace))
        environments.extend(self._sandboxes_names_list(parent_trace, kb_session))

        if environment_filter != None:
            environments                = [e for e in environments if environment_filter(e) == True]

        if kinds_of_interest == None:
            kinds_of_interest           = self._get_all_kinds(parent_trace, kb_session)

        original_env_name               = kb_session.store.current_environment(parent_trace).name(parent_trace)
        for env_name in environments:
            kb_session.store.activate(parent_trace, env_name)

            def _manifest_filter(parent_trace, manifest_dict):
                #TODO For now we just approve everything. Later will need to filter on labels
                return True

            manifest_dict_list          = kb_session.store.searchManifests(parent_trace, kinds_of_interest, 
                                                                                manifest_filter = _manifest_filter)
            # ["Kind", "Version", "Estimated on", "Namespace", "Name", "Environment"]
            GET                         = DictionaryUtils().get_val
            for m_dict in manifest_dict_list:
                kind                = GET(parent_trace, m_dict, "Manifest", ["kind"], [str])
                version             = GET(parent_trace, m_dict, "Manifest", ["metadata", "version"], [int])
                estimated_on        = GET(parent_trace, m_dict, "Manifest", ["assertion", "estimatedOn"], [datetime])
                recorded_by         = GET(parent_trace, m_dict, "Manifest", ["assertion", "recordedBy"], [datetime])
                namespace           = GET(parent_trace, m_dict, "Manifest", ["metadata", "namespace"], [str])
                name                = GET(parent_trace, m_dict, "Manifest", ["metadata", "name"], [str])

                description_table.append([kind, version, estimated_on, recorded_by,  namespace, name, env_name])
            
        # To ensure output to be predictable (e.g., for regression tests) we sort the description table. We found that
        # otherwise some tests that pass in Windows will fail when run in a Linux container.
        #
        KIND_IDX                    = 0
        VERSION_IDX                 = 1
        description_table           = sorted(description_table, key=lambda entry: entry[KIND_IDX] + str(entry[VERSION_IDX]))

        kb_session.store.activate(parent_trace, original_env_name)

        description                     = "\n\n"
        description                     += tabulate(description_table, headers=description_headers)
        description                     += "\n"

        return description
Пример #18
0
 def entity_as_yaml_fieldname(self):
     return StringUtils().format_as_yaml_fieldname(self._entity_name)
Пример #19
0
def diff(kb_session, manifest_api, kind, namespace, name):
    '''
    Makes a diff between two versions of a manifest.
    
    For a list of valid MANIFEST_APIs and KINDs, try 'get apis'
    
    For a list of valid NAMESPACEs and NAMEs, try 'get assertions'

    MANIFEST_API must be a versionless manifest API. 
    
    Example: 'delivery-planning.journeys.a6i.io', (as opposed to 'delivery-planning.journeys.a6i.io/v1a'). 
    '''
    timer = ApodeixiTimer()
    func_trace = FunctionalTrace(parent_trace=None, path_mask=None)
    root_trace = func_trace.doing("CLI call to post",
                                  origination={'signaled_from': __file__})

    kb_operation_succeeded = False
    try:

        my_trace = root_trace.doing(
            "Invoking ManifestUtils's postByFile service")

        diff_result = ManifestUtils().diff_manifest(
            parent_trace=my_trace,
            store=kb_session.store,
            manifest_api_name=manifest_api,
            namespace=namespace,
            name=name,
            kind=kind,
            version1=None,
            version2=None)
        kb_operation_succeeded = True

        diff_description = CLI_Utils().describe_diff_response(
            my_trace, kb_session, diff_result)

        # GOTCHA:
        # Make sure to remove non-ascii characters before passing the description to click.echo, since it
        # will raise errors if there are characters like \uFFFFD in the description
        #
        diff_description = StringUtils().to_ascii(diff_description)

        click.echo(diff_description)
        output = "Success"
        click.echo(output)
        click.echo(timer.elapsed_time_message())
    except ApodeixiError as ex:
        error_msg = CLI_ErrorReporting(kb_session).report_a6i_error(
            parent_trace=root_trace, a6i_error=ex)
        if kb_operation_succeeded:
            error_msg                       = "KnowledgeBase operation completed, but run into a problem when preparing "\
                                                + "a description of the response:\n"\
                                                + error_msg
        # GOTCHA
        #       Use print, not click.echo or click exception because they don't correctly display styling
        #       (colors, underlines, etc.). So use vanilla Python print and then exit
        print(error_msg)
        _sys.exit()
    except Exception as ex:
        try:
            error_msg = CLI_ErrorReporting(kb_session).report_generic_error(
                parent_trace=root_trace, generic_error=ex)
            if kb_operation_succeeded:
                error_msg                   = "KnowledgeBase operation completed, but run into a problem when preparing "\
                                                + "a description of the response:\n"\
                                                + error_msg
        except Exception as ex2:
            error_msg                       = "CLI run into trouble: found error:\n\n\t" + str(ex) + "\n\n" \
                                                + "To make things worse, when trying to produce an error log file with a "\
                                                + "stack trace, run into an additional error:\n\n\t" + str(ex2)
        # GOTCHA
        #       Use print, not click.echo or click exception because they don't correctly display styling
        #       (colors, underlines, etc.). So use vanilla Python print and then exit
        print(error_msg)
        _sys.exit()
 def entity_as_yaml_fieldname(self):
     ME                      = Mock_Controller._AccountHierarchyConfig
     return StringUtils().format_as_yaml_fieldname(ME._ENTITY_NAME)
Пример #21
0
        def checkReferentialIntegrity(self, parent_trace):
            '''
            Used to check that the values of Posting Label fields are valid. Does not return a value, but will
            raise an exception if any field is "invalid".

            Sometimes this validation might be against data configured in the ApodeixiConfig. Example: "organization"

            In other situations the validation is against the existence of static data objects which the label
            references. Example: "product" in the case of the Journeys domain.

            NOTE: This method is intended to be called *after* label.read(-) has completed, including any label.read(-)
            implemented by derived classes. 
            That is why it can't be called within label.read(-) at the PostingLabel parent class level,
            and why the design choice was made to have the calling code invoke this check right after calling label.read()
            '''
            super().checkReferentialIntegrity(parent_trace)

            # In addition to checks made by the parent class, we want to check that references to read-only manifests
            # are correct. Specifically, we want to make sure that milestones manifest references the most recent version
            # of the big-rocks manifest, before we accept the submitted Excel for the milestones manifest.
            #
            # So we check that the version of the big-rocks in the Posting Label is indeed the most recent version of the
            # big-rocks.
            my_trace = parent_trace.doing(
                "Checking milestones reference most recent big-rocks")
            ME = MilestonesController
            manifest_api_name = self.controller.getManifestAPI().apiName()
            organization = self.organization(my_trace)
            kb_area = self.knowledgeBaseArea(my_trace)
            FMT = StringUtils(
            ).format_as_yaml_fieldname  # Abbreviation for readability
            namespace = FMT(organization + '.' + kb_area)
            manifest_name = self.controller.manifestNameFromLabel(
                my_trace, label=self, kind=ME.REFERENCED_KIND)

            manifest_dict, manifest_path = self.controller.store.findLatestVersionManifest(
                parent_trace=my_trace,
                manifest_api_name=manifest_api_name,
                namespace=namespace,
                name=manifest_name,
                kind=ME.REFERENCED_KIND)

            BIG_ROCKS_MANIFEST_NB = 0
            referenced_manifest_nb = BIG_ROCKS_MANIFEST_NB
            last_version_nb = ManifestUtils().get_manifest_version(
                my_trace, manifest_dict)

            submitted_version_nb = self.priorVersion(my_trace,
                                                     referenced_manifest_nb)

            if submitted_version_nb < last_version_nb:
                raise ApodeixiError(
                    my_trace,
                    "Excel form needs to be refreshed and re-submitted because it does not reference "
                    + "the most recent version of the '" + ME.REFERENCED_KIND +
                    "'. Request a new form " + "for '" + ME.MY_KIND +
                    "' to reflect the correct version for '" +
                    ME.REFERENCED_KIND +
                    "' and re-apply your changes to that form, and re-submit",
                    data={
                        "version submitted": str(submitted_version_nb),
                        "latest version": str(last_version_nb)
                    })
            if submitted_version_nb > last_version_nb:
                raise ApodeixiError(
                    my_trace,
                    "Excel form needs to be refreshed and re-submitted because it references "
                    + "a non-existent version of the '" + ME.REFERENCED_KIND +
                    "'. Request a new form " + "for '" + ME.MY_KIND +
                    "' to reflect the correct version for '" +
                    ME.REFERENCED_KIND +
                    "' and re-apply your changes to that form, and re-submit",
                    data={
                        "version submitted": str(submitted_version_nb),
                        "latest version": str(last_version_nb)
                    })