Exemplo n.º 1
0
def reportInterfaceAssembly():

    rpcInterfaceDefinition = json.load(
        open("../Definitions/rpcInterfaceDefinition.bjsn"),
        object_pairs_hook=OrderedDict)

    mu = """## VA VistA RPC Interface Definition 
    
"""

    mu += muRPCInterfaceDefinition(rpcInterfaceDefinition)

    # Add in information on Sources
    cntSSOs = Counter()
    cntFromSSO = Counter()
    for defn in rpcInterfaceDefinition:
        for sso in defn["inVistAs"]:
            cntSSOs[sso] += 1
        cntFromSSO[defn["fromVistA"]] += 1
    mu += """### Source Information for Integrated Definition
    
The integrated interface definition combines RPC definitions from multiple real VistA and FOIA. Overall __{}__ RPCs are not in FOIA.

""".format(
        reportAbsAndPercent(
            len(rpcInterfaceDefinition) - cntSSOs["999"],
            len(rpcInterfaceDefinition)))
    tbl = MarkdownTable(["Station", "RPCs", "Definition Contribution"])
    for sso in sorted(cntSSOs):
        tbl.addRow([sso, cntSSOs[sso], cntFromSSO[sso]])
    mu += tbl.md() + "\n\n"

    print mu

    open("../Reports/rpcInterfaceDefinition.md", "w").write(mu)
def reportRPCOptions(stationNo):

    mu = """## RPC Options of {} 
    
Using _Active, Used RPC Options_ to subset 8994 and Build named RPCs. Expect a __15% reduction__ if we add a requirement that an RPC needs to belong to [1] an active option [2] belonging to a recently signed on user.
    
""".format(stationNo)

    if stationNo == "999":
        mu += "__Exception: with its inconsisent builds and 8994 and its lack of user sign ons and types, FOIA (999) does not follow a regular VistA pattern so many observations below don't apply to it.__\n\n" 
    
    # Raw RPC Broker Options to give # that don't have "rpcs" (purely for reporting)
    _19Reductions = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_19Reduction.json"))
    """
    Form: {"label": rpc, "options": [{"label" "isRemoved", sUsersCount, usersCount}]
    ... note: not ALL broker options as some have no RPCs (must get from raw 19)
                    and will flip to
          byOption: {"label" (option), ... "rpcs": []} 
    ie/ so options under rpcs and rpcs under options
    
    and then two sets: activeNUsedOptions and the RPCs of those options
    """
    rpcOptionsWithUse = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_rpcOptionsWithUse.json")) 
    rpcOptionInfoByLabel = {} # includes RPCs of options    
    for roi in rpcOptionsWithUse:
        rpc = roi["label"]
        for oi in roi["options"]:
            if oi["label"] not in rpcOptionInfoByLabel:
                rpcOptionInfoByLabel[oi["label"]] = oi # flip
                oi["rpcs"] = []
            rpcOptionInfoByLabel[oi["label"]]["rpcs"].append(roi["label"])
    activeUsedOptions = set(option for option in rpcOptionInfoByLabel if "isRemoved" not in rpcOptionInfoByLabel[option] and "sUsersCount" in rpcOptionInfoByLabel[option])
    rpcsOfActiveUsedOptions = set(rpc for option in activeUsedOptions for rpc in rpcOptionInfoByLabel[option]["rpcs"])
                            
    mu += """There are {:,} RPC Broker options, {:,} of which name __{:,}__ RPCs. {:,} of these options are marked 'deleted', leaving __{:,}__ of such option-backed RPCs. A further {:,} options are not assigned to an active, recently signed on user - of these, {:,} had older, no longer active users. When those without signed-on users are removed, we're left with __{}__ RPCs backed by __{:,}__ active options with users who recently signed on.
    
__Note__: options _{}_ require keys and {:,} options have Proxy Users - both need testing and analysis.
    
""".format(
        len(_19Reductions),
        len(rpcOptionInfoByLabel),
        len(rpcOptionsWithUse),
        
        sum(1 for option in rpcOptionInfoByLabel if "isRemoved" in rpcOptionInfoByLabel[option]),
        len(set(rpc for option in rpcOptionInfoByLabel if "isRemoved" not in rpcOptionInfoByLabel[option] for rpc in rpcOptionInfoByLabel[option]["rpcs"])),
        
        sum(1 for option in rpcOptionInfoByLabel if not ("isRemoved" in rpcOptionInfoByLabel[option] or "sUsersCount" in rpcOptionInfoByLabel[option])), 
        sum(1 for option in rpcOptionInfoByLabel if "usersCount" in rpcOptionInfoByLabel[option] and "sUsersCount" not in rpcOptionInfoByLabel[option]),
        
        reportAbsAndPercent(
            len(rpcsOfActiveUsedOptions), 
            len(rpcOptionsWithUse)
        ),
        len(activeUsedOptions),
        
        ", ".join(sorted(["\"{}\"".format(option) for option in rpcOptionInfoByLabel if "keyRequired" in rpcOptionInfoByLabel[option] and "sUsersCount" in rpcOptionInfoByLabel[option]])),
        sum(1 for option in rpcOptionInfoByLabel if "proxyUsersCount" in rpcOptionInfoByLabel[option])
        
    ) 
    
    """
    Let's see how many of these option RPCs are 
    """
    bpis = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_rpcBPIs.json"))
    _buildActiveRPCs = set(re.sub(r'\_', '/', bpi["label"]) for bpi in bpis if "isDeleted" not in bpi)
    _inOptionsButNotInBuilds = set(rpc for rpc in rpcsOfActiveUsedOptions if rpc not in _buildActiveRPCs) # few
    _inBuildsButNotOptions = set(rpc for rpc in _buildActiveRPCs if rpc not in rpcsOfActiveUsedOptions)
    _8994Reduction = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_8994Reduction.json"))
    _8994Labels = set(re.sub(r'\_', '/', red["label"]) for red in _8994Reduction) 
    _allBuildActiveAnd8994RPCs = _buildActiveRPCs.union(_8994Labels)
    _inOptionsButNot8994 = set(rpc for rpc in rpcsOfActiveUsedOptions if rpc not in _8994Labels)
    _in8994ButNotOptions = set(rpc for rpc in _8994Labels if rpc not in rpcsOfActiveUsedOptions)
    
    mu += """When compared to _Build RPCs_ and _8994 RPCs_:
    
  * Installed Builds name __{}__ RPCs not in used options while those options name __{:,}__ RPCs not in these Builds ({}).
  * 8994 defines __{}__ RPCs not in user options while those options name __{:,}__ RPCs not in 8994.
  
__Conclusion:__ _Used Options_ reduce the __{:,}__ RPCs named by both Builds and 8994s to __{}__.

""".format(
        reportAbsAndPercent(len(_inBuildsButNotOptions), len(_buildActiveRPCs)),
        len(_inOptionsButNotInBuilds), 
        ", ".join(["\"{}\"".format(rpc) for rpc in _inOptionsButNotInBuilds]),
    
        reportAbsAndPercent(len(_in8994ButNotOptions), len(_8994Labels)),
        len(_inOptionsButNot8994),
        
        len(_allBuildActiveAnd8994RPCs),
        reportAbsAndPercent(len(rpcsOfActiveUsedOptions), len(_allBuildActiveAnd8994RPCs))
    )

    # Show Active RPC Option details
    cols = ["Option", "RPC \#", "Exclusive RPC \#"]
    if stationNo != "999":
        cols.append("\# User / SO / SO0 / Proxy")
    tbl = MarkdownTable(cols)
    for option in sorted(activeUsedOptions, key=lambda x: len(rpcOptionInfoByLabel[x]["rpcs"]) if stationNo == "999" else rpcOptionInfoByLabel[x]["sUsersCount"], reverse=True):
        rpcsOfOtherOptions = set(rpc for ooption in activeUsedOptions if ooption != option for rpc in rpcOptionInfoByLabel[ooption]["rpcs"]) # of other ACTIVE/SO options!
        exclusiveRPCCount = sum(1 for rpc in rpcOptionInfoByLabel[option]["rpcs"] if rpc not in rpcsOfOtherOptions)
        row = ["__{}__".format(option), len(rpcOptionInfoByLabel[option]["rpcs"]), exclusiveRPCCount] 
        if stationNo != "999":
            optionInfo = rpcOptionInfoByLabel[option]
            userCountMU = optionInfo["usersCount"]
            _0SUsersCountMU = "{:,}".format(optionInfo["_0SUsersCount"]) if "_0SUsersCount" in optionInfo else "-"
            proxyUserCountMU = "{:,}".format(rpcOptionInfoByLabel[option]["proxyUsersCount"]) if "proxyUsersCount" in rpcOptionInfoByLabel[option] else "-"
            userCountMU = "{:,} / {:,} / {} / {}".format(userCountMU, optionInfo["sUsersCount"], _0SUsersCountMU, proxyUserCountMU)
            row.append(userCountMU)
        tbl.addRow(row)
    mu += "{:,} Active, SO User Options ...\n\n".format(len(activeUsedOptions))
    mu += tbl.md() + "\n\n"
           
    # Excluded Options, their RPCs, exclusive or otherwise 
    excludedOptions = set(option for option in rpcOptionInfoByLabel if "isRemoved" in rpcOptionInfoByLabel[option] or "sUsersCount" not in rpcOptionInfoByLabel[option])
    rpcsOfExcludedOptions = set(rpc for option in excludedOptions for rpc in rpcOptionInfoByLabel[option]["rpcs"])
    rpcsExclusiveToExcludedOptions = rpcsOfExcludedOptions - rpcsOfActiveUsedOptions
    tbl = MarkdownTable(["Option", "RPC \#", "E+E RPC \#", "(No SO) User \#", "Is Deleted"]) 
    for option in sorted(excludedOptions, key=lambda x: len(rpcOptionInfoByLabel[x]["rpcs"]), reverse=True):
        oInfo = rpcOptionInfoByLabel[option]
        userCountMU = oInfo["usersCount"] if "usersCount" in oInfo else ""
        isRemovedMU = "__YES__" if "isRemoved" in oInfo else ""
        exclusiveExcludedRPCCount = sum(1 for rpc in oInfo["rpcs"] if rpc in rpcsExclusiveToExcludedOptions)
        exclusiveExcludedRPCCountMU = exclusiveExcludedRPCCount if exclusiveExcludedRPCCount > 0 else ""
        tbl.addRow([option, len(oInfo["rpcs"]), exclusiveExcludedRPCCountMU, userCountMU, isRemovedMU])
    mu += "{:,} Excluded (removed or no SO User) Options with {:,} RPCs, {:,} of which don't appear in active options. Note that only a small minority of these options are formally deleted ...\n\n".format(len(excludedOptions), len(rpcsOfExcludedOptions), len(rpcsExclusiveToExcludedOptions))
    mu += tbl.md() + "\n\n"    
    
    mu += """__TODO__:

  * Enhance: Add Build data for options using option info in builds => see first introduction etc
  * Besides the CPRS option, pay attention to Active/SO options with a high proproportion of 0 users: MAG WINDOWS, CAPRI, MAGJ VISTARAD WINDOWS, KPA VRAM GUI, VPR APPLICATION PROXY
  * Focus on options with many 'Exclusive RPCs' like CAPRI, MAG DICOM VISA, YS BROKER1, R1SDCI and others which also have a highish number of users - unlike the OVERLAPPING options, these introduce whole new sets of RPCs
  * SO0 is responsible for most of the logins for many of the most significant SO's (MAG, KPA etc)
  * PROXY users (see user class in user reduction): see the proxy users count. If close to all then very special option
  * Implication of DELETING Excluded Options and their exclusive RPCs - reducing VistA size
  
"""
        
    open(VISTA_REP_LOCN_TEMPL.format(stationNo) + "rpcOptions.md", "w").write(mu) 
def reportRPCOptionDetails(stationNo):

    if stationNo == "999":
        raise Exception("Can't support FOIA 999 as not enough representative sign ons")
    
    # Four inputs: users (with sign ons inside), RPC options, Apps and Options
    userInfos = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_200Reduction.json"))
    _19Reductions = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_19Reduction.json"))
    rpcsOfOptions = dict((_19Reduction["label"], _19Reduction["rpcs"]) for _19Reduction in _19Reductions if "rpcs" in _19Reduction)
    remoteAppsAndOptions = dict((_8994_5Reduction["label"], _8994_5Reduction["option"]) for _8994_5Reduction in json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_8994_5Reduction.json")))
        
    moUserCounts = Counter()
    mo0UserCount = Counter()
    moNon0UserCount = Counter()
    qualifierMOUserCountByMO = defaultdict(lambda: Counter())
    moAloneUserCounts = Counter()
    menuOptionCombinationCounts = defaultdict(lambda: Counter()) # ie/ # of combos expected
    jmoUserCounts = Counter()
    cntSOUsers = 0
    cntSOUsersRPCOptions = 0
    cntSOUsersNoRPCOptions = 0
    cntNon0SOUsersRPCOptions = 0
    for userInfo in userInfos:
        if "signOnCount" not in userInfo:
            continue
        cntSOUsers += 1
        if "menuOptions" not in userInfo:
            raise Exception("Expect ALL signed on users to have at least one menu option")
        # Leaving in "userClasses"/ proxies
        userHasRPCMenuOption = False
        jMenuOptions = "/".join([mo for mo in sorted(userInfo["menuOptions"]) if mo in rpcsOfOptions])
        if jMenuOptions != "":
            jmoUserCounts[jMenuOptions] += 1
        for menuOption in userInfo["menuOptions"]:
            if menuOption not in rpcsOfOptions:
                continue
            if menuOption == "CG FMQL QP USER":
                continue
            moUserCounts[menuOption] += 1
            if "isCreatedBy0" in userInfo:
                mo0UserCount[menuOption] += 1
            else:
                moNon0UserCount[menuOption] += 1
            userHasRPCMenuOption = True
            combined = False
            for cmenuOption in userInfo["menuOptions"]:
                if cmenuOption not in rpcsOfOptions:
                    continue
                if cmenuOption == menuOption:
                    continue
                if cmenuOption == "CG FMQL QP USER":
                    continue
                qualifierMOUserCountByMO[menuOption][cmenuOption] += 1
                combined = True
            if not combined:
                moAloneUserCounts[menuOption] += 1
            else:
                menuOptionCombinationCounts[menuOption][len(userInfo["menuOptions"]) - 1] += 1
        # There are SO users with no RPC menu options (important subset!)
        if userHasRPCMenuOption:
            cntSOUsersRPCOptions += 1
            if "isCreatedBy0" not in userInfo:
                cntNon0SOUsersRPCOptions += 1
        else:
            cntSOUsersNoRPCOptions += 1
    pureAlones = set(mo for mo in moAloneUserCounts if float(moAloneUserCounts[mo])/float(moUserCounts[mo]) > 0.15)
    pureQualifierOptions = set(mo for mo in moUserCounts if mo not in moAloneUserCounts)
    otherQualifierOptions = set(mo for mo in moUserCounts if mo in moAloneUserCounts and float(moAloneUserCounts[mo])/float(moUserCounts[mo]) <= 0.15)
    allQualifierOptions = pureQualifierOptions.union(otherQualifierOptions)

    mu = """## RPC Options of {} Classified
    
Based on active (SO) users use.
    
""".format(stationNo)

    tbl = MarkdownTable(["Type", "\#"])
    tbl.addRow(["Users", len(userInfos)])
    tbl.addRow(["SO Users", cntSOUsers])
    tbl.addRow(["SO Users with RPC Options", cntSOUsersRPCOptions])
    tbl.addRow(["SO Users with other than RPC Options", cntSOUsersNoRPCOptions])
    tbl.addRow(["Zero SO Users w/RPC Options", cntSOUsersRPCOptions - cntNon0SOUsersRPCOptions])
    tbl.addRow(["Non Zero SO Users w/RPC Options", cntNon0SOUsersRPCOptions])
    tbl.addRow(["RPC Options", len(rpcsOfOptions)]) 
    tbl.addRow(["Used RPC Options", len(moUserCounts)])
    tbl.addRow(["Zero SO Users RPC Options", len(mo0UserCount)])
    tbl.addRow(["Non Zero SO Users RPC Options", len(moNon0UserCount)])
    tbl.addRow(["Pure Alone Options", len(pureAlones)])
    tbl.addRow(["Singleton Alone Options (NEVER COMBINED)", sum(1 for mo in moUserCounts if mo not in qualifierMOUserCountByMO)])
    tbl.addRow(["Pure Qualifier Options - NEVER on their own", len(pureQualifierOptions)])
    tbl.addRow(["Other Qualifier Options - < 15% on their own (so not _Pure Alones_)", len(otherQualifierOptions)])
    mu += tbl.md() + "\n\n"

    optionsWithGT1PRCTUsers = set(option for option in moUserCounts if float(moUserCounts[option])/float(cntSOUsersRPCOptions) > 0.01)
    mu += """Despite there being __{:,}__ employed RPC Options, only __{:,}__ are used by more than 1% of sign on users, the vast majority of which are remote/0 users.
    
""".format(len(moUserCounts), len(optionsWithGT1PRCTUsers))
    tbl = MarkdownTable(["Option", "Total Users", "0 Users", "Non 0 Users", "RPCs"])
    for mo in sorted(list(optionsWithGT1PRCTUsers), key=lambda x: moUserCounts[x], reverse=True):
        tbl.addRow([
            mo, 
            moUserCounts[mo],
            reportAbsAndPercent(mo0UserCount[mo], moUserCounts[mo]) if mo in mo0UserCount else "",
            reportAbsAndPercent(moNon0UserCount[mo], moUserCounts[mo]) if mo in moNon0UserCount else "",
            len(rpcsOfOptions[mo])
        ])
    mu += tbl.md() + "\n\n"
    
    mu += "The {:,} active Non zero users need separate consideration. The top non Zero user options - > 10% non Zeros have it - are listed below. Note that other than the mainstream options, most are highlighted as they are mainly in Non Zero users\n\n".format(cntNon0SOUsersRPCOptions)
    optionsWithGT10PRCTUsers = set(option for option in moNon0UserCount if float(moNon0UserCount[option])/float(cntNon0SOUsersRPCOptions) > 0.1)
    tbl = MarkdownTable(["Option", "Non 0 Users", "Of Total", "RPCs"])
    for mo in sorted(list(optionsWithGT10PRCTUsers), key=lambda x: moNon0UserCount[x], reverse=True):
        level = round(float(moNon0UserCount[mo])/float(moUserCounts[mo]), 2)
        tbl.addRow([
            "__{}__".format(mo) if level > 0.75 else mo, 
            reportAbsAndPercent(moNon0UserCount[mo], cntNon0SOUsersRPCOptions),
            level,
            len(rpcsOfOptions[mo])
        ])
    mu += tbl.md() + "\n\n"
    
    mu += "\nThere are {:,} _Pure Alones_, options that can exist on their own (> 15% of users with them have only them)\n\n".format(len(pureAlones))
    tbl = MarkdownTable(["Option", "Total Users", "0 Users", "Alone Users", "CPRS Combos", "Other Combos", "Quals", "Alone Quals", "Top Quals", "RPCs"])
    for i, mo in enumerate(sorted(list(pureAlones), key=lambda x: moUserCounts[x], reverse=True), 1):
        cntNonCPRSQuals = sum(jmoUserCounts[jmo] for jmo in jmoUserCounts if not re.search(r'OR CPRS GUI CHART', jmo) and jmo != mo and re.search(mo, jmo))
        thres = .1 if moUserCounts[mo] > 100 else .9
        topQualsMU = ""
        if mo in qualifierMOUserCountByMO:
            topQuals = sorted([cmo for cmo in qualifierMOUserCountByMO[mo] if float(qualifierMOUserCountByMO[mo][cmo])/float(moUserCounts[mo]) > thres], key=lambda x: qualifierMOUserCountByMO[mo][x], reverse=True)
            if len(topQuals):
                topQualsMU = ", ".join(["{} ({:,})".format(cmo, qualifierMOUserCountByMO[mo][cmo]) for cmo in topQuals])
        _0Level = float(mo0UserCount[mo])/float(moUserCounts[mo])
        tbl.addRow([
            "__{}__".format(mo) if _0Level > 0.5 else mo, # only highlight if a lot of 0's
            moUserCounts[mo],
            reportAbsAndPercent(mo0UserCount[mo], moUserCounts[mo]) if mo in mo0UserCount else "",
            reportAbsAndPercent(moAloneUserCounts[mo], moUserCounts[mo]),
            reportAbsAndPercent(
                qualifierMOUserCountByMO[mo]["OR CPRS GUI CHART"],
                moUserCounts[mo]
            ) if mo in qualifierMOUserCountByMO and "OR CPRS GUI CHART" in qualifierMOUserCountByMO[mo] else "",
            reportAbsAndPercent(
                cntNonCPRSQuals,
                moUserCounts[mo]
            ) if cntNonCPRSQuals > 0 else "",
            len(qualifierMOUserCountByMO[mo].keys()),
            reportAbsAndPercent(
                sum(1 for cmo in qualifierMOUserCountByMO[mo] if cmo in pureAlones),
                len(qualifierMOUserCountByMO[mo].keys())
            ),
            topQualsMU,
            len(rpcsOfOptions[mo])
        ])
    mu += tbl.md() + "\n\n"
    
    mu += """__Note__ (mostly put into the Source Artifact manualRPCApplications):
    
  * _KPA VRAM GUI_ belongs to __VistA Remote Access Management (VRAM) Graphical User Interface (GUI)__ according to this [patch](https://github.com/OSEHRA/VistA/blob/master/Packages/Kernel/Patches/XU_8.0_629/XU-8_SEQ-502_PAT-629.TXT). It has a 8995 application entry and seems to sync credentials from the VBA 'VistA' to a local VistA - check out the RPCs it allows. Note that half its users are stand alone while the rest use CAPRI and very few use CPRS. Note too that this option DOES NOT HAVE MANY QUALIFIERS (unlike other 'alones')
  * _MAGJ VISTARAD WINDOWS_ is a __VistARad__ option according [to](https://www.va.gov/vdl/documents/clinical/vista_imaging_sys/imginstallgd_f.pdf). Additionally, note that the _Rad/Nuc Med Personnel menu_ defines further user permissions (where stored?) and there are a series of security keys guarding actions. Note that this option DOES NOT HAVE MANY QUALIFIERS (unlike other 'alones')
  * _MAG WINDOWS_ for __VistA Imaging and Capture Software__ according to [this](https://www.va.gov/vdl/documents/clinical/vista_imaging_sys/imginstallgd_f.pdf). Note that ala Rad, there are keys to further restrict options.
  * _DSIY ABOVE PAR_ belongs to __Above PAR (APAR)__ by the [TRM](https://www.oit.va.gov/Services/TRM/ToolPage.aspx?tid=7725)
  * _RMPR PURCHASE ORDER GUI_ is part of __PROSTHETICS PURCHASE ORDER GUI__
  * _OOP GUI EMPLOYEE_ is from __ASISTS__ which is being decommissioned in Jan 2019.
  
"""
    mu += "The balance of the \"Qualifier\" Options are defined at the end of this report.\n\n"
        
    # 8994_5 and use
    mu += "### File 8994_5 Applications and their options\n\n"
    mu += """File 8994_5 defines 'Remote Applications'. Each is given a (default) option. There are {:,} applications using/sharing {:,} options. Note that {:,} of these options are NOT RPC options and {:,} are not assigned to any active user. Note that _JLV_ (for now) lacks an entry here or its own option (it uses CPRS, CAPRI and VPR options). The following shows the applications by option ...
    
""".format(
        len(remoteAppsAndOptions),
        len(set(remoteAppsAndOptions.values())),
        
        sum(1 for option in set(remoteAppsAndOptions.values()) if option not in rpcsOfOptions),
        sum(1 for option in set(remoteAppsAndOptions.values()) if option not in moUserCounts)
    )
    byOption = defaultdict(list)
    for label, option in remoteAppsAndOptions.iteritems():
        byOption[option].append(label)
    tbl = MarkdownTable(["Option", "RPCs", "Users", "Applications"])
    for option in sorted(byOption):
        optionMU = "__{}__".format(option) if option in moUserCounts else option
        tbl.addRow([optionMU, "NO" if option not in rpcsOfOptions else "", moUserCounts[option] if option in moUserCounts else "", ", ".join(sorted(byOption[option]))])
    mu += tbl.md() + "\n\n"
    
    stats = {}
    for userInfo in userInfos:
        if "signOnCount" not in userInfo:
            continue
        if "menuOptions" not in userInfo:
            raise Exception("Expect ALL signed on users to have at least one menu option")
        # Leaving in "userClasses" / proxies
        rpcMOs = [mo for mo in userInfo["menuOptions"] if mo in rpcsOfOptions]
        if not len(rpcMOs):
            continue
        if "remoteApps" in userInfo["signOnDetails"]:
            for rapp in userInfo["signOnDetails"]["remoteApps"]:
                if rapp not in remoteAppsAndOptions:
                    raise Exception("New Unexpected Remote App {}".format(rapp)) 
        # for lbl, app, mo in appMOPairs:
        for app, mo in remoteAppsAndOptions.iteritems():
            lbl = app
            if lbl not in stats:
                stats[lbl] = {"moLabel": mo, "appLabel": app, "mo": set(), "app": set(), "app0User": 0}
            if mo in rpcMOs:
                stats[lbl]["mo"].add(userInfo["userId"])
            if "remoteApps" in userInfo["signOnDetails"] and app in userInfo["signOnDetails"]["remoteApps"]:
                stats[lbl]["app"].add(userInfo["userId"])
                if "isCreatedBy0" in userInfo:
                    stats[lbl]["app0User"] += 1 # note if app use by 0 user  
    tblRowCount = 0                  
    tbl = MarkdownTable(["App", "Option", "App Users", "App 0 Users", "App+MO", "!App MO", "App !MO"]) 
    for lbl in sorted(stats, key=lambda x: len(stats[x]["app"]), reverse=True):
        stat = stats[lbl]
        # clean if all mo and no app
        if len(stat["app"]) == 0:
            continue
        if float(len(stats[lbl]["mo"].intersection(stats[lbl]["app"])))/float(len(stats[lbl]["app"])) < 0.9:
            moAndAppMU = "{:,} [UNDER MATCH]".format(len(stats[lbl]["mo"].intersection(stats[lbl]["app"])))
        else:
            moAndAppMU = reportAbsAndPercent(len(stats[lbl]["mo"].intersection(stats[lbl]["app"])), len(stats[lbl]["app"]))
        moNoApp = len(stats[lbl]["mo"] - stats[lbl]["app"])
        moNoAppMU = "{:,} [APP-OPTION MATCH]".format(moNoApp) if float(moNoApp)/float(len(stat["app"])) < 0.1 else moNoApp
        row = [
            stat["appLabel"],
            stat["moLabel"],
            len(stat["app"]),
            reportAbsAndPercent(stat["app0User"], len(stat["app"])),
            moAndAppMU,
            moNoAppMU,
            len(stats[lbl]["app"] - stats[lbl]["mo"])
        ]
        tbl.addRow(row)
        tblRowCount += 1
    mu += """What 8994.5 applications are used? It's {:,} out of the {:,}. What option best matches an app - does the __presence of an option predict the use of a (8994.5) application?__ Note that even 8994.5 shows option sharing and such sharing is borne out in the table of signon and user information below.
    
The low counts in the _App !MO_ column shows that the apps are good indicators that an option is present but in general _!App MO_ shows many cases where an option is too broadly given (CPRS, MAG WINDOWS ...) to predict app use. _VRAM_ is the only clear exception though VISTARAD and its singular option is probably an exception too.

Note that _DVBA CAPRI GUI_ is the only _qualifier_ option here. It is actually a _stand alone_ but is always paired with _OR CPRS GUI CHART_ by the CAPRI-style setup code. It's high _!App MO_ count is because of this pairing which is used by JLV and other apps.
    
""".format(tblRowCount, len(remoteAppsAndOptions))
    mu += tbl.md() + "\n\n"
    
    # Back to Qualifier Details
    tblMU = MarkdownTable(["Option", "Total Users", "0 Users", "Others Quals", "Alone Quals", "Top Quals", "RPCs"]) 
    tblLU = MarkdownTable(["Option", "Total Users", "0 Users", "Others Quals", "Alone Quals", "Top Quals", "RPCs"]) 
    lessUsedThreshold = 30
    tblMUCount = 0
    tblLUCount = 0
    for i, mo in enumerate(sorted(list(allQualifierOptions), key=lambda x: moUserCounts[x], reverse=True), 1):
        row = [
            "__{}__".format(mo) if len(qualifierMOUserCountByMO[mo]) < moUserCounts[mo] else mo,
            moUserCounts[mo],
            reportAbsAndPercent(mo0UserCount[mo], moUserCounts[mo]) if mo in mo0UserCount else "",
            len(qualifierMOUserCountByMO[mo]),
            sum(1 for cmo in qualifierMOUserCountByMO[mo] if cmo in pureAlones)
        ]
        tcmus = []
        for cmo in sorted(qualifierMOUserCountByMO[mo], key=lambda x: qualifierMOUserCountByMO[mo][x], reverse=True): 
            level = round(float(qualifierMOUserCountByMO[mo][cmo]) / float(moUserCounts[mo]), 2)
            # want Alones or High Match
            if not (level > 0.25 or cmo in pureAlones):
                continue
            if level == 1:
                tcmu = "__{}__ (ALL)".format(cmo)
            elif cmo in moAloneUserCounts:
                tcmu = "__{}__ ({})".format(cmo, level)
            else:
                tcmu = "{} ({})".format(cmo, level)
            tcmus.append(tcmu)
        if len(tcmus) < len(qualifierMOUserCountByMO[mo]):
            tcmus.append("...")
        row.append(", ".join(tcmus))
        row.append(len(rpcsOfOptions[mo]))
        if moUserCounts[mo] < lessUsedThreshold:
            tblLU.addRow(row)
            tblLUCount += 1
            continue
        tblMU.addRow(row)
        tblMUCount += 1
    # Back to Qualifiers
    mu += "### {:,} Qualifier Option Details\n\n".format(tblMUCount + tblLUCount)
    mu += "There are {:,} more used (> {:,} users) qualifiers. Those with more users than other qualifiers are highlighted as are combinations with primary/alone options ...\n\n".format(tblMUCount, lessUsedThreshold)
    mu += tblMU.md() + "\n\n"
    mu += "There are {:,} less used (< {:,} users) qualifiers ...\n\n".format(tblLUCount, lessUsedThreshold)
    mu += tblLU.md() + "\n\n"
    
    byCombo = Counter()
    for userInfo in userInfos:
        if "signOnCount" not in userInfo:
            continue
        if "menuOptions" not in userInfo:
            continue
        # note: leaving in userClasses
        if sum(1 for mo in userInfo["menuOptions"] if mo in rpcsOfOptions) == 0:
            continue
        if "CG FMQL QP USER" in userInfo["menuOptions"]:
            continue
        if sum(1 for mo in userInfo["menuOptions"] if mo in moAloneUserCounts) == 0:
            combo = "/".join(sorted([mo for mo in userInfo["menuOptions"] if mo in moUserCounts]))
            byCombo[combo] += 1
        
    if len(byCombo):
        mu += """There are {:,} users w/o Alones - ie/ their 'apps' are option combos.
    
""".format(sum(byCombo[combo] for combo in byCombo))
        tbl = MarkdownTable(["Combination", "Users"])
        for combo in sorted(byCombo, key=lambda x: byCombo[x], reverse=True):
            tbl.addRow([combo, byCombo[combo]])
        mu += tbl.md() + "\n\n"
                    
    qualifiersWithoutAlones = [mo for mo in allQualifierOptions if sum(1 for cmo in qualifierMOUserCountByMO[mo] if cmo in moAloneUserCounts) == 0]
    if len(qualifiersWithoutAlones):
        mu += "__Note__: the following Qualifiers (ie/ not alones) are NOT combined with Alones: {}\n\n\n".format(", ".join(qualifiersWithoutAlones))
        
    open(VISTA_REP_LOCN_TEMPL.format(stationNo) + "rpcOptionDetails.md", "w").write(mu) 
    
    """
Exemplo n.º 4
0
def assembleIntegrated():

    rpcInterfaceDefinitionBySNO = {}
    lastInstallBySNO = {}
    for sno in SNOS:
        rpcInterfaceDefinitionBySNO[sno] = json.load(
            open(
                VISTA_RPCD_LOCN_TEMPL.format(sno) +
                "_rpcInterfaceDefinition.json"))
        installs = set(
            defn["installed"].split("T")[0]
            for defn in rpcInterfaceDefinitionBySNO[sno]
            if "installed" in defn and defn["label"] != "CG FMQL QP")
        lastInstallBySNO[sno] = sorted(list(installs))[-1]

    rpcDefinitionsById = {}
    for sno in sorted(SNOS,
                      key=lambda x: lastInstallBySNO[x]
                      if x != "999" else "1900-01-01",
                      reverse=True):
        for rpcDefinition in rpcInterfaceDefinitionBySNO[sno]:
            rpc = rpcDefinition["label"]
            if rpc in rpcDefinitionsById:
                rpcDefinitionsById[rpc]["inVistAs"].append(sno)
                continue
            rpcDefinition["fromVistA"] = sno
            rpcDefinition["inVistAs"] = [sno]
            rpcDefinitionsById[rpc] = rpcDefinition

    integratedRPCInterfaceDefinition = sorted(
        [rpcDefinitionsById[rpc] for rpc in rpcDefinitionsById],
        key=lambda x: x["label"])

    print "Integrated Definitions: __{:,}__\n".format(
        len(integratedRPCInterfaceDefinition))
    for sno in sorted(SNOS):
        print "  * {}: {} / Last: {} / {}".format(
            sno if sno != "999" else "FOIA (999)",
            reportAbsAndPercent(
                sum(1 for defn in integratedRPCInterfaceDefinition
                    if defn["fromVistA"] == sno),
                len(integratedRPCInterfaceDefinition)),
            lastInstallBySNO[sno],
            reportAbsAndPercent(len(rpcInterfaceDefinitionBySNO[sno]),
                                len(integratedRPCInterfaceDefinition)),
        )
    # Could add appearances ie/ in 1, 2 or 3
    iActives = set(defn["label"] for defn in integratedRPCInterfaceDefinition
                   if "isActive" in defn)
    print "\nActive: __{}__\n".format(
        reportAbsAndPercent(len(iActives),
                            len(integratedRPCInterfaceDefinition)))
    for sno in sorted(SNOS):
        sActives = set(defn["label"]
                       for defn in rpcInterfaceDefinitionBySNO[sno]
                       if "isActive" in defn)
        print "  * {}: {:,} - not I {:,}".format(
            sno if sno != "999" else "FOIA (999)",
            len(sActives),
            len(sActives - iActives)  # 0 if base!
        )
    print

    json.dump(integratedRPCInterfaceDefinition,
              open("../Definitions/rpcInterfaceDefinition.bjsn", "w"),
              indent=4)
Exemplo n.º 5
0
def reportBuildsNInstallsOld(stationNo):

    buildsReduction = json.load(
        open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_9_6Reduction.json"))
    buildsRPCReduction = [bi for bi in buildsReduction if "rpcs" in bi]

    # For report - will OVERRIDE based on ACTIVE from Builds or Not (soon options too)
    _8994Reduction = json.load(
        open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_8994Reduction.json"))
    _8994Labels = set(red["label"] for red in _8994Reduction)

    buildsByRPC = defaultdict(list)
    buildsWNewRPCReduction = []
    buildsByPackage = defaultdict(list)
    packagesByRPC = defaultdict(lambda: Counter())  # want latest to come out
    dateDistributeds = []
    countBuildsByYr = Counter()
    countNewRPCBuildsByYr = Counter()

    # Builds per RPC form an "audit" trail of RPC introduction, change and deletion
    rpcsSeen = set()
    for buildInfo in buildsRPCReduction:
        newRPCSeen = False
        if "package" in buildInfo:
            buildsByPackage[buildInfo["package"]].append(buildInfo)
        if "dateInstalledFirst" in buildInfo:
            installed = buildInfo["dateInstalledFirst"]
        for actionType in buildInfo["rpcs"]:
            for rpc in buildInfo["rpcs"][actionType]:
                info = {"build": buildInfo["label"], "action": actionType}
                if "package" in buildInfo:
                    info["package"] = buildInfo["package"]
                    packagesByRPC[rpc][buildInfo["package"]] += 1
                if "dateDistributed" in buildInfo:
                    info["distributed"] = buildInfo["dateDistributed"]
                    if rpc not in rpcsSeen:
                        newRPCSeen = True
                        rpcsSeen.add(rpc)
                if "dateInstalledFirst" in buildInfo:
                    info["installed"] = installed
                buildsByRPC[rpc].append(info)
        if newRPCSeen:
            buildsWNewRPCReduction.append(buildInfo)
        if "dateDistributed" in buildInfo:
            distributed = buildInfo["dateDistributed"]
            if not re.search(r'FMQL', buildInfo["label"]):
                dateDistributeds.append(distributed)
                countBuildsByYr[distributed.split("-")[0]] += 1
                if newRPCSeen:
                    countNewRPCBuildsByYr[distributed.split("-")[0]] += 1
                else:
                    print x
    """
    Flips from Builds to view from RPC side: only considers Builds that are installed and 
    if the first build seen for an RPC, it must be a SEND TO SITE (create) build.
    """
    rpcBPIByRPC = {}
    nixBuildAsNotInstalled = 0
    nixBuildForRPCAsFirstButNotSend = 0
    for buildInfo in buildsRPCReduction:
        if buildInfo["isInstalled"] == False:  # let's not count it!
            nixBuildAsNotInstalled += 1
            continue
        for actionType in buildInfo["rpcs"]:
            for rpc in buildInfo["rpcs"][actionType]:
                if rpc not in rpcBPIByRPC:
                    if actionType != "SEND TO SITE":  # let's not count until get a SEND
                        nixBuildForRPCAsFirstButNotSend += 1
                        continue
                    rpcBPIByRPC[rpc] = {
                        "label": rpc,
                        "installed": buildInfo["dateInstalledFirst"],
                        "builds": []
                    }
                    if "dateDistributed" in buildInfo:
                        rpcBPIByRPC[rpc]["distributed"] = buildInfo[
                            "dateDistributed"]
                bir = {
                    "label": buildInfo["label"],
                    "action": actionType,
                    "installed": buildInfo["dateInstalledFirst"]
                }
                rpcBPIByRPC[rpc]["builds"].append(bir)
                if "package" in buildInfo:
                    bir["package"] = buildInfo["package"]
                if "dateDistributed" in buildInfo:
                    bir["distributed"] = buildInfo["dateDistributed"]
    for rpc in rpcBPIByRPC:
        bpi = rpcBPIByRPC[rpc]
        if bpi["builds"][-1]["action"] == "DELETE AT SITE":
            bpi["isDeleted"] = True
            bpi["deleteInstalled"] = bpi["builds"][-1]["installed"]
            if "distributed" in bpi["builds"][-1]:
                bpi["deleteDistributed"] = bpi["builds"][-1]["distributed"]
        packages = [
            info["package"] for info in buildsByRPC[rpc] if "package" in info
        ]
        if len(set(packages)) == 1:
            bpi["package"] = packages[0]
        elif len(packages):  # can be none!
            packages.reverse()
            # ex override: [u'ORDER ENTRY/RESULTS REPORTING', u'GEN. MED. REC. - VITALS']
            bpi["package"] = [
                pkg for pkg in packages
                if pkg != "ORDER ENTRY/RESULTS REPORTING"
            ][0]  # last which isn't the overused OE

    rpcBPIs = json.load(
        open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_rpcBPIs.json"),
        indent=4)

    # PICK ANYTHING BUT 'ORDER ENTRY/RESULTS REPORTING' if another there
    rpcsByPackage = defaultdict(list)
    for rpc in packagesByRPC:
        if len(packagesByRPC[rpc]
               ) > 1 and "ORDER ENTRY/RESULTS REPORTING" in packagesByRPC[rpc]:
            del packagesByRPC[rpc]["ORDER ENTRY/RESULTS REPORTING"]
        for pkg in packagesByRPC[rpc]:
            rpcsByPackage[pkg].append(rpc)

    dateDistributeds = sorted(dateDistributeds)
    rpcWithMostBuilds = sorted(buildsByRPC,
                               key=lambda x: len(buildsByRPC[x]),
                               reverse=True)[0]
    packageWithTheMostRPCs = sorted(rpcsByPackage,
                                    key=lambda x: len(rpcsByPackage[x]),
                                    reverse=True)[0]

    deletedRPCs = set(
        rpc for rpc in buildsByRPC if buildsByRPC[rpc][-1]["action"] ==
        "DELETE AT SITE")  # this will include those never even installed!
    activeRPCs = set(buildsByRPC) - deletedRPCs

    _8994MissingActiveRPCs = activeRPCs - _8994Labels  # should be there but not
    _8994DeletedRPCs = _8994Labels.intersection(deletedRPCs)
    _8994NoBuildRPCs = _8994Labels - activeRPCs  # beyond active/still there

    mu = """## RPCs According to Builds and Installs of {}
    
There are __{}__ builds defining __{:,}__ RPCs distributed from _{}_ to _{}_, __{}__ of which introduce new RPCs. RPC _{}_ appears in the most builds, __{:,}__. The median number of RPCs per Build is __{:,}__. 

RPCs are spread across __{:,}__ packages. Package _{}_ has the most RPCs, __{:,}__. __{:,}__ RPCs have more than one Package usually because of re-organization and splitting of Packages over the years. __{:,}__ RPCs have no package because their builds weren't assigned a package (yet).

Builds can delete as well as add RPCs - __{:,}__ of the RPCs were deleted by the final Build they appeared in, leaving __{:,}__ RPCs active and installed.

File _8994_ is suppossed to define the active RPCs in a VistA. However the 8994 of this system has __{:,}__ deleted RPCs, is missing __{:,}__ active RPCs and has __{:,}__ extra RPCs that never appear in a Build.
    
""".format(
        stationNo,
        reportAbsAndPercent(len(buildsRPCReduction), len(buildsReduction)),
        len(buildsByRPC), dateDistributeds[0], dateDistributeds[-1],
        reportAbsAndPercent(len(buildsWNewRPCReduction),
                            len(buildsRPCReduction)), rpcWithMostBuilds,
        len(buildsByRPC[rpcWithMostBuilds]),
        numpy.percentile([len(buildsByRPC[rpc]) for rpc in buildsByRPC], 50),
        len(buildsByPackage), packageWithTheMostRPCs,
        len(rpcsByPackage[packageWithTheMostRPCs]),
        sum(1 for rpc in packagesByRPC if len(packagesByRPC[rpc]) > 1),
        sum(1 for rpc in buildsByRPC if rpc not in packagesByRPC),
        len(deletedRPCs), len(activeRPCs), len(_8994DeletedRPCs),
        len(_8994MissingActiveRPCs), len(_8994NoBuildRPCs))
    """
    Packages by RPC
    """
    tbl = MarkdownTable(["Package", "\# RPCs", "Example RPC"])
    for pkg in sorted(rpcsByPackage,
                      key=lambda x: len(rpcsByPackage[x]),
                      reverse=True):
        tbl.addRow([
            "__{}__".format(pkg),
            len(rpcsByPackage[pkg]),
            sorted(list(rpcsByPackage[pkg]))[0]
        ])
    mu += "{:,} Packages have RPCs, while {:,} RPCs have no Package. Clearly the top Packages (ORDERS, IMAGES and some COTS packages) need to be examined first ...\n\n".format(
        len(rpcsByPackage),
        sum(1 for rpc in buildsByRPC if rpc not in packagesByRPC))
    mu += tbl.md() + "\n\n"
    """        
    Builds by Year - may be restated subsequently so distinguish builds introducing
    fresh RPCs from those just restating. Note that the total of "new RPCs" is roughly
    the total of RPCs (some builds lack a date which accounts for the discrepency)
    """
    tbl = MarkdownTable(["Year", "All RPC Builds", "New RPC Builds"])
    for yr in sorted(countBuildsByYr, key=lambda x: int(x), reverse=True):
        tbl.addRow([
            yr,
            reportAbsAndPercent(countBuildsByYr[yr], len(buildsRPCReduction)),
            reportAbsAndPercent(countNewRPCBuildsByYr[yr],
                                len(buildsWNewRPCReduction))
        ])
    mu += """RPC Builds by distribution year. Note that as builds often restate pre-existing RPCs, the following distinguishes all builds with RPCs from those that introduce new RPCs ...
     
"""
    mu += tbl.md() + "\n\n"

    # distributed | installed
    def muDate(buildInfos, dtProp):
        def calcDate(buildInfos, dtProp, first=False):
            dt = ""
            for buildInfo in buildInfos:
                if dtProp in buildInfo:
                    dt = buildInfo[dtProp]
                    if re.search(r'T', dt):  # only day
                        dt = dt.split("T")[0]
                    if first:
                        break
            return dt

        last = calcDate(buildsByRPC[rpc], dtProp, False)
        first = calcDate(buildsByRPC[rpc], dtProp, True)
        return first, last

    tbl = MarkdownTable([
        "RPC", "Builds", "(Latest) Package", "Distributed",
        "[First] Install Gap"
    ])
    lastBuildMNMU = ""
    gaps = []
    noGapRPCs = []
    badGapRPCs = []
    for i, rpc in enumerate(sorted(activeRPCs, key=lambda x: x), 1):

        firstD, lastD = muDate(buildsByRPC[rpc], "distributed")
        distribMU = lastD
        if firstD != lastD:
            distribMU = "{} - {}".format(firstD, distribMU)

        firstI, lastI = muDate(buildsByRPC[rpc], "installed")
        if firstI == "" or firstD == "":
            installGapMU = "__N/A__"
            noGapRPCs.append(rpc)
        elif firstI > firstD:
            installGapMU = str(
                datetime.strptime(firstI, "%Y-%m-%d") -
                datetime.strptime(firstD, "%Y-%m-%d")).split(",")[0]
            gaps.append(int(re.match(r'(\d+)', installGapMU).group(1)))
        elif firstI == firstD:
            installGapMU = ""
            gaps.append(0)
        else:
            installGapMU = "__RERELEASE: D > I__: {} > {}".format(
                firstD, firstI)
            badGapRPCs.append(rpc)

        # Alt: move out of 'ORDER ENTRY/RESULTS REPORTING' if another as more precise too
        packageMU = sorted(packagesByRPC[rpc],
                           key=lambda x: packagesByRPC[rpc][x],
                           reverse=True)[0] if rpc in packagesByRPC else ""

        tbl.addRow([
            "__{}__".format(rpc),
            len(buildsByRPC[rpc]), packageMU, distribMU, installGapMU
        ])

    mu += "__{:,}__ Active/Installed RPCs. The maximum gap in days between distribution and install is {:,}, the median is {:,}, {:,} have no gap at all. The gap isn't available if necessary dates are missing ({:,}) or the first install date comes BEFORE the build distribution date (__{:,}__) ...\n\n".format(
        len(activeRPCs), max(gaps), numpy.percentile(gaps, 50),
        sum(1 for g in gaps if g == 0), len(noGapRPCs), len(badGapRPCs))
    mu += tbl.md() + "\n\n"
    """
    Deleted RPCs - note that there are probably more? or should be more (retired 
    packages). Note that only 'SHOULD BE DELETED' RPCs (see below) need concern
    the integrated RPC Interface definition.
    
    TODO: work out retirement better to enforce more retireds
    """
    tbl = MarkdownTable(
        ["RPC", "(Last) Deleting Build", "When (Dist/Install)"])
    for i, rpc in enumerate(sorted(deletedRPCs), 1):
        lastDelBuildInfo = buildsByRPC[rpc][-1]
        # a/cs for when no install info
        whenMU = "{} / {}".format(
            lastDelBuildInfo["distributed"],
            lastDelBuildInfo["installed"].split("T")[0]
            if "installed" in lastDelBuildInfo else "-")
        tbl.addRow(
            [rpc, re.sub(r'\*', '\\*', lastDelBuildInfo["build"]), whenMU])
    mu += "{:,} Deleted/Uninstalled RPCs ...\n\n".format(len(deletedRPCs))
    mu += tbl.md() + "\n\n"
    """
    8994 tie: Rogue RPCs 
    
    those [1] builds says SHOULD be there but aren't ("MISSING") and [2] builds don't 
    account for them or should be deleted ("EXTRA") and [3] builds delete but are 
    still in 8994 ("SHOULD BE DELETED")
    
    Note: possible build logic wrong OR builds badly built (remote of RPC not done but
    code removed?) etc
    """
    rogueRPCs = (
        _8994MissingActiveRPCs.union(_8994NoBuildRPCs)).union(_8994DeletedRPCs)
    tbl = MarkdownTable(["RPC", "Problem"])
    for rpc in sorted(list(rogueRPCs)):
        problem = "EXTRA"
        if rpc in _8994MissingActiveRPCs:
            problem = "MISSING"
        elif rpc in _8994DeletedRPCs:
            problem = "SHOULD BE DELETED"
        problem = "MISSING" if rpc in _8994MissingActiveRPCs else "EXTRA"
        tbl.addRow([rpc, problem])
    mu += "__8994 Rogue RPCs__ are [1] in 8994 but are not active according to Builds (\"EXTRA\" {:,}) or active by builds but not in 8994 (\"MISSING\" {:,}) or deleted by builds but in 8994 (\"SHOULD BE DELETED\" {:,}). __IMPORTANT__: must __test__ if the extra are still active (have code etc) and if so, why ...\n\n".format(
        len(_8994NoBuildRPCs), len(_8994MissingActiveRPCs),
        len(_8994DeletedRPCs), len(rogueRPCs))
    mu += tbl.md() + "\n\n"

    if stationNo == "999":
        mu += "__Note__: FOIA (999) has MANY _Rogues_. It seems that redaction is partial for non Open Source RPCs. It seems that the code is removed but the RPC remains.\n\n"

    open(
        VISTA_REP_LOCN_TEMPL.format(stationNo) + "rpcsByBuildsNInstalls.md",
        "w").write(mu)
Exemplo n.º 6
0
def muRPCInterfaceDefinition(rpcInterfaceDefinition,
                             isFOIA=False):  # allows global use

    # First Install/Last Install; first distrib/ last distrib (not FMQL)
    distributeds = sorted(
        list(
            set(rpcDefn["distributed"] for rpcDefn in rpcInterfaceDefinition
                if "distributed" in rpcDefn
                and not re.search(r'FMQL', rpcDefn["label"]))))
    firstDistributed = distributeds[0]
    lastDistributed = distributeds[-1]
    installeds = sorted(
        list(
            set(rpcDefn["installed"] for rpcDefn in rpcInterfaceDefinition
                if "installed" in rpcDefn
                and not re.search(r'CG FMQL', rpcDefn["label"]))))
    lastInstalled = installeds[-1].split("T")[0]

    mu = ""

    # TODO: may add 'lastSignon' for user that may use RPC => can see last possible use
    mu += "There are __{:,}__ RPCs, __{}__ of which are active. The first RPCs were distributed on _{}_, the last on _{}_. The last installation happened on _{}_.".format(
        len(rpcInterfaceDefinition),
        reportAbsAndPercent(
            sum(1 for rpcDefn in rpcInterfaceDefinition
                if "isActive" in rpcDefn), len(rpcInterfaceDefinition)),
        firstDistributed, lastDistributed, lastInstalled)

    # Walk the deletion/marking of inactive
    mu += """
    
RPCs are marked inactive in stages ...

"""
    tbl = MarkdownTable(["Stage", "\# At/After"])
    tbl.addRow(["Total", len(rpcInterfaceDefinition)])
    # has8994FullEntry
    tbl.addRow([
        "8994 Full Entry",
        sum(1 for defn in rpcInterfaceDefinition if "has8994FullEntry" in defn)
    ])
    # Is in installed build
    tbl.addRow([
        "Installed Build",
        sum(1 for defn in rpcInterfaceDefinition
            if "has8994FullEntry" in defn and "hasInstalledBuild" in defn)
    ])
    # Is in ActiveSO Option (999 just in active option)
    if isFOIA:
        tbl.addRow([
            "Has Current Option",
            sum(1 for defn in rpcInterfaceDefinition
                if "has8994FullEntry" in defn and "hasInstalledBuild" in defn
                and "options" in defn and sum(
                    1 for optionInfo in defn["options"]
                    if "isRemoved" not in optionInfo))
        ])
    elif "inVistAs" in rpcInterfaceDefinition[0]:  # merged defns
        # issue is that FOIA only RPCs are marked active JUST for having options
        # and these needed to be added in
        baseSet = set(
            defn["label"] for defn in rpcInterfaceDefinition
            if "has8994FullEntry" in defn and "hasInstalledBuild" in defn
            and "hasActiveSOUsedOptions" in defn)
        foiaOnlySet = set(
            defn["label"] for defn in rpcInterfaceDefinition
            if len(defn["inVistAs"]) == 1 and defn["inVistAs"][0] == "999"
            and "has8994FullEntry" in defn and "hasInstalledBuild" in defn
            and "options" in defn and sum(1 for optionInfo in defn["options"]
                                          if "isRemoved" not in optionInfo))
        cnt = len(baseSet) + len(foiaOnlySet - baseSet)
        tbl.addRow(["Has currently used Active Option", cnt])
    else:
        tbl.addRow([
            "Has currently used Active Option",
            sum(1 for defn in rpcInterfaceDefinition
                if "has8994FullEntry" in defn and "hasInstalledBuild" in defn
                and "hasActiveSOUsedOptions" in defn)
        ])
    mu += tbl.md() + "\n\n"

    # Summarize RPC introduction over the years ...
    """
    Can add in 'for HMP for 2016?' etc + add in # removed each year too as extra col
    """
    byYrDistrib = Counter()
    byYrDeleted = Counter()
    byYrDeleteDistrib = Counter()
    byYrDistribInactive = Counter()
    noDistrib = 0
    withDistribYr = 0
    totalDeleted = 0
    for defn in rpcInterfaceDefinition:
        # allow for edge case that delete distributed date but no distrib date
        if "deleteDistributed" in defn:
            byYrDeleteDistrib[int(defn["distributed"].split("-")[0])] += 1
            totalDeleted += 1
        if "distributed" not in defn:
            noDistrib += 1
            continue
        withDistribYr += 1
        byYrDistrib[int(defn["distributed"].split("-")[0])] += 1
        if "isActive" not in defn:
            byYrDistribInactive[int(defn["distributed"].split("-")[0])] += 1

    mu += """### RPC Distribution by Year
    
{:,} RPCs have no 'first distributed' date as their first builds lacked a date - the other {:,} all have dates. Here is RPC distribution year by year, along with the small amount of deletion too. Note that only __{}__ RPCs are formally deleted though __{}__ should be.

""".format(
        noDistrib, sum(byYrDistrib[yr] for yr in byYrDistrib),
        reportAbsAndPercent(totalDeleted, len(rpcInterfaceDefinition)),
        reportAbsAndPercent(
            sum(1 for rpcDefn in rpcInterfaceDefinition
                if "isActive" not in rpcDefn), len(rpcInterfaceDefinition)))
    # Note: deleted = deleted in a year while Inactive == of the active
    tbl = MarkdownTable(["Year", "Added \#", "Deleted \#", "Inactive \#"])
    for yr in sorted(byYrDistrib, reverse=True):
        ddMU = "" if yr not in byYrDeleteDistrib else byYrDeleteDistrib[yr]
        tbl.addRow([
            str(yr),
            reportAbsAndPercent(byYrDistrib[yr], withDistribYr), ddMU,
            reportAbsAndPercent(byYrDistribInactive[yr], byYrDistrib[yr])
            if yr in byYrDistribInactive else ""
        ])
    mu += tbl.md() + "\n\n"
    """
    Want MUMPS entry pts for ALL
    """
    cntMUMPSEntry = Counter()
    noMUMPSEntry = 0
    activeRoutines = set()
    inactiveRoutines = set()  # overlap == MIX
    for defn in rpcInterfaceDefinition:
        if "routine" not in defn:
            noMUMPSEntry += 1
            continue
        cntMUMPSEntry[defn["routine"]] += 1
        if "isActive" in defn:
            activeRoutines.add(defn["routine"])
        else:
            inactiveRoutines.add(defn["routine"])

    mu += """### MUMPS Routine Implementation
    
__{}__ RPCs are implemented in __{}__ separate MUMPS routines, while __{}__ identified RPCs lack an implementation. The highest number of RPCs per routine is __{}__ (_{}_), the median is __{}__, the lowest is __{}__. __{}__ routines implement only active RPCs, __{:,}__ only inactive RPCs (candidates for deletion?), while __{:,}__ implement a mix of active and inactive RPCs.\n\n""".format(
        sum(cntMUMPSEntry[routine] for routine in cntMUMPSEntry),
        len(cntMUMPSEntry), noMUMPSEntry, numpy.max(cntMUMPSEntry.values()), [
            routine for routine in cntMUMPSEntry
            if cntMUMPSEntry[routine] == numpy.max(cntMUMPSEntry.values())
        ][0], numpy.percentile(cntMUMPSEntry.values(), 50),
        numpy.min(cntMUMPSEntry.values()),
        reportAbsAndPercent(len(activeRoutines - inactiveRoutines),
                            len(cntMUMPSEntry)),
        len(inactiveRoutines - activeRoutines),
        len(activeRoutines.intersection(inactiveRoutines)))

    mu += "The (outliers) that implement the most RPCs are ...\n\n"
    routinesByRPCCnt = defaultdict(list)
    for routine in cntMUMPSEntry:
        routinesByRPCCnt[cntMUMPSEntry[routine]].append(routine)
    tbl = MarkdownTable(["\# RPCs", "Routine(s)"])
    ohto = numpy.percentile(cntMUMPSEntry.values(), 75) + (
        3 * (numpy.percentile(cntMUMPSEntry.values(), 75) -
             numpy.percentile(cntMUMPSEntry.values(), 25)))
    for cnt in sorted(routinesByRPCCnt, reverse=True):
        if cnt < ohto:
            break
        tbl.addRow([
            cnt, ", ".join([
                "__{}__ [INACTIVE]".format(routine)
                if routine in inactiveRoutines else routine
                for routine in sorted(routinesByRPCCnt[cnt])
            ])
        ])
    mu += tbl.md() + "\n\n"

    # Packages
    noPackageRPCs = []
    rpcByPackage = defaultdict(list)
    firstRPCDistribByPackage = {}
    activeRPCs = set()
    for defn in rpcInterfaceDefinition:
        if "isActive" in defn:
            activeRPCs.add(defn["label"])
        if "package" in defn:
            package = defn["package"]
            rpcByPackage[package].append(defn["label"])
            if "distributed" in defn:
                if package in firstRPCDistribByPackage:
                    if defn["distributed"] < firstRPCDistribByPackage[package]:
                        firstRPCDistribByPackage[package] = defn["distributed"]
                else:
                    firstRPCDistribByPackage[package] = defn["distributed"]
        else:
            noPackageRPCs.append(defn["label"])
    inactiveOnlyPackages = set(package for package in rpcByPackage if sum(
        1 for rpc in rpcByPackage[package] if rpc in activeRPCs) == 0)
    someInactivesPackages = set(
        package for package in rpcByPackage
        if package not in inactiveOnlyPackages and sum(
            1 for rpc in rpcByPackage[package]
            if rpc in activeRPCs) < len(rpcByPackage[package]))

    # Based on reduction alg - needs to evolve as see effectiveness
    mu += """### Packages
    
_Package_ is a sometimes inconsistently used breakdown of VistA into a set of cooperating applications. All but __{}__ RPCs are assigned to __{}__ different packages, __{:,}__ of which only have _inactive_ RPCs and __{:,}__ more have a mix of active and inactive RPCs. 

Those with at least one active RPC are - note ORDER ENTRY has a huge proportion which MAY be due to redundant/overlapping purposes of individual RPCs ...

""".format(
        reportAbsAndPercent(len(noPackageRPCs), len(rpcInterfaceDefinition)),
        len(rpcByPackage), len(inactiveOnlyPackages),
        len(someInactivesPackages))

    tbl = MarkdownTable(
        ["Package", "First Distributed RPC", "Active RPCs", "Inactive RPCs"])
    for package in sorted(rpcByPackage,
                          key=lambda x: len(rpcByPackage[x]),
                          reverse=True):
        if package in inactiveOnlyPackages:
            continue
        noActives = sum(1 for rpc in rpcByPackage[package]
                        if rpc in activeRPCs)
        noInactives = sum(1 for rpc in rpcByPackage[package]
                          if rpc not in activeRPCs)
        tbl.addRow([
            package, firstRPCDistribByPackage[package]
            if package in firstRPCDistribByPackage else "",
            noActives if noActives > 0 else "",
            noInactives if noInactives > 0 else ""
        ])
    mu += tbl.md() + "\n\n"

    mu += "The 'inactive-only' Packages are ...\n\n"
    tbl = MarkdownTable(
        ["Package", "First Distributed RPC", "RPCs (Inactive)"])
    for package in sorted(rpcByPackage,
                          key=lambda x: len(rpcByPackage[x]),
                          reverse=True):
        if package not in inactiveOnlyPackages:
            continue
        noInactives = sum(1 for rpc in rpcByPackage[package]
                          if rpc not in activeRPCs)
        tbl.addRow([
            package, firstRPCDistribByPackage[package]
            if package in firstRPCDistribByPackage else "",
            len(rpcByPackage[package])
        ])
    mu += tbl.md() + "\n\n"

    return mu
Exemplo n.º 7
0
def reportPackagesNBuilds(stationNo):

    _9_6Reduction = json.load(
        open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_9_6Reduction.json"))

    buildsByPackage = defaultdict(list)
    noPackageBuilds = []
    allDatesDistributeds = set()
    for buildInfo in _9_6Reduction:
        if "dateDistributed" in buildInfo and not re.search(
                r'FMQL', buildInfo["label"]):
            allDatesDistributeds.add(buildInfo["dateDistributed"])
        if "package" in buildInfo:
            buildsByPackage[buildInfo["package"]].append(buildInfo)
            continue
        noPackageBuilds.append(buildInfo)
    packagesWith2013OnBuilds = [
        pkg for pkg in buildsByPackage
        if sum(1 for bi in buildsByPackage[pkg] if "dateDistributed" in bi
               and int(bi["dateDistributed"].split("-")[0]) >= 2013)
    ]
    allDatesDistributeds = sorted(list(allDatesDistributeds))
    firstDateDistributed = allDatesDistributeds[0]
    lastDateDistributed = allDatesDistributeds[-1]
    countBuildsPerPackage = dict(
        (pkg, len(buildsByPackage[pkg])) for pkg in buildsByPackage)
    medianBuildsPerPackage = numpy.percentile(countBuildsPerPackage.values(),
                                              50)
    maxBuildsPerPackage = max(countBuildsPerPackage.values())
    pkgsOrdered = [
        pkg for pkg in sorted(countBuildsPerPackage,
                              key=lambda x: countBuildsPerPackage[x],
                              reverse=True)
    ]
    pkgWithMostBuilds = pkgsOrdered[0]

    mu = """## Packages and Builds
    
There are {:,} builds, distributed between {} and {}. {:,} packages cover {} of the builds, median number of builds per package is {:,}, maximum is {:,} in __{}__. Only {} packages have builds distributed from 2013 on (_should the balance be retired?_). {} builds have no package and only {} builds have RPCs.
    
""".format(
        len(_9_6Reduction), firstDateDistributed, lastDateDistributed,
        len(buildsByPackage),
        reportAbsAndPercent(
            len([bi
                 for pkg in buildsByPackage for bi in buildsByPackage[pkg]]),
            len(_9_6Reduction)), medianBuildsPerPackage, maxBuildsPerPackage,
        pkgWithMostBuilds,
        reportAbsAndPercent(len(packagesWith2013OnBuilds),
                            len(buildsByPackage)),
        reportAbsAndPercent(len(noPackageBuilds), len(_9_6Reduction)),
        reportAbsAndPercent(sum(1 for bi in _9_6Reduction if "rpcs" in bi),
                            len(_9_6Reduction)))

    mu += "{:,} Packages and their builds, highlight for the {:,} packages with at least one RPC build ...\n\n".format(
        len(buildsByPackage),
        sum(1 for pkg in buildsByPackage
            if sum(1 for bi in buildsByPackage[pkg] if "rpcs" in bi)))
    tbl = MarkdownTable([
        "Package", "Build \#", "Build Dates", "Build w/RPC \#",
        "Build w/RPC Delete \#"
    ])
    for pkg in sorted(buildsByPackage,
                      key=lambda x: len(buildsByPackage[x]),
                      reverse=True):
        pkgMU = "__{}__".format(pkg) if sum(1 for bi in buildsByPackage[pkg]
                                            if "rpcs" in bi) else pkg
        dateDistributeds = sorted(
            list(
                set(bi["dateDistributed"].split("-")[0]
                    for bi in buildsByPackage[pkg]
                    if "dateDistributed" in bi)))
        if len(dateDistributeds) == 0:
            ddMU = ""
        elif len(dateDistributeds) > 1:
            ddMU = "{} - {}".format(dateDistributeds[0], dateDistributeds[-1])
        else:
            ddMU = dateDistributeds[0]
        rpcBuildInfos = [bi for bi in buildsByPackage[pkg] if "rpcs" in bi]
        rpcBuildInfosDelete = [
            bi for bi in rpcBuildInfos if "DELETE AT SITE" in bi["rpcs"]
        ]
        tbl.addRow([
            pkgMU,
            len(buildsByPackage[pkg]), ddMU,
            len(rpcBuildInfos) if len(rpcBuildInfos) > 0 else "",
            len(rpcBuildInfosDelete) if len(rpcBuildInfosDelete) > 0 else ""
        ])

    mu += tbl.md() + "\n\n"

    noPackageBuildsWRPCs = [bi for bi in noPackageBuilds if "rpcs" in bi]
    mu += "{:,} Builds without a Package but with RPCs ...\n\n".format(
        len(noPackageBuildsWRPCs))
    tbl = MarkdownTable(["Build", "RPC \#s"])
    for bi in sorted(noPackageBuildsWRPCs, key=lambda x: x["label"]):
        tbl.addRow([bi["label"], sum(len(bi["rpcs"][x]) for x in bi["rpcs"])])
    mu += tbl.md() + "\n\n"

    open(VISTA_REP_LOCN_TEMPL.format(stationNo) + "packagesAndBuilds.md",
         "w").write(mu)