示例#1
0
class Connectivity(FeatureInterface):
    def __init__(self):
        self.connectivityIdentifier = ConnectivityIdentifier()

    def calculate(self, column, columnIndex, table):
        connectivity = self.connectivityIdentifier.getConnectivity(table)
        return connectivity[columnIndex]
class SupportConnectivityIdentifier(object):
    def __init__(self):
        self.logger = Logger().getLogger(__name__)
        self.connectivityIdentifier = ConnectivityIdentifier()
        self.supportIdentifier = SupportIdentifier()

    def identifySubjectColumn(self, table, supportFloor=10, supportCeil=70, connectivityThreshold=0.01, alpha=0.5):
        connectivities = self.connectivityIdentifier.getConnectivity(table, applyWeights=False)
        supports = self.supportIdentifier.getSupport(table)

        supports = [support if support < supportCeil and support > supportFloor else 0 for support in supports]
        connectivities = [
            connectivity if connectivity > connectivityThreshold else 0 for connectivity in connectivities
        ]

        # Make supports and connectivities on the same scale
        connectivities = [connectivity * 100 for connectivity in connectivities]
        # supports = [support / 10 for support in supports]

        consups = [0] * len(connectivities)
        for columnIndex, item in enumerate(consups):
            consups[columnIndex] = alpha * supports[columnIndex] + (1 - alpha) * connectivities[columnIndex]

        return consups.index(max(consups))