Пример #1
0
    def forward(self, graph, alreadySeenIn, activeNodesIn):
        """
            Inputs:
                graph: A tuple (nbrs, nbrCounts) representing the adjacency grpah of nodes.
                alreadySeenIn: A set of node.
                activeNodesIn: All neighbors of nodeSet, which don't fall back into nodeSet.
            Outputs:
                alreadySeenOut: alreadySeenSetIn \\union activeNodeSet
                activeNodeOut: nbrs(activeNodesIn) - alreadySeenSetOut
        """
        nbrs, nbrCounts = graph
        alreadySeenSet = set(alreadySeenIn.tolist())
        # print("Start pycode: {0}".format(alreadySeenSet))
        activeNodesSet = set()
        for activeNode in activeNodesIn.tolist():
            # print("For {0}".format(activeNode))
            nbrCount = int(nbrCounts[activeNode])
            for nbr in nbrs[activeNode, 0:nbrCount].tolist():
                if nbr not in alreadySeenSet:
                    # print("\tNot found {0}".format(nbr))
                    activeNodesSet.add(int(nbr))
                # else:
                # print("\tFound {0}".format(nbr))
        # print("End pycode:")
        alreadySeenSet = alreadySeenSet.union(activeNodesSet)

        activeNodesOut = longTensor(sorted(activeNodesSet),
                                    device=activeNodesIn.device)
        alreadySeenOut = longTensor(sorted(alreadySeenSet),
                                    device=alreadySeenIn.device)

        return alreadySeenOut, activeNodesOut
Пример #2
0
def SpotNeighborsExplorerTest(testNo, appConfig, modelArgs, device):
    from hier2hier.models.spotNeighborsExplorer import SpotNeighborsExplorer
    nodeCount = random.randint(1, 100)
    maxNbrs = random.randint(1, nodeCount / 2)
    nbrs = torch.randint(nodeCount, (nodeCount, maxNbrs), device=device)
    nbrCounts = torch.randint(maxNbrs, (nodeCount, ), device=device)
    for node in range(nodeCount):
        for nbr in range(int(nbrCounts[node]), maxNbrs):
            nbrs[node, nbr] = -1
    graph = (nbrs, nbrCounts)

    explorerUnderTest = SpotNeighborsExplorer(device=device)
    explorerToMatch = SpotNeighborsExplorer(impl_selection="python",
                                            device=device)

    startAtiveSetCount = random.randint(0, int(nodeCount / 3))
    activeSetIn = longTensor(sorted(
        random.sample(range(nodeCount), startAtiveSetCount)),
                             device=device)
    alreadySeenSetIn = activeSetIn.clone()
    while activeSetIn.shape[0]:
        alreadySeenOut1, activeSetOut1 = explorerToMatch(
            graph, alreadySeenSetIn, activeSetIn)
        alreadySeenOut2, activeSetOut2 = explorerUnderTest(
            graph, alreadySeenSetIn, activeSetIn)

        assert (set(alreadySeenOut1.tolist()) == set(alreadySeenOut2.tolist()))
        assert (len(alreadySeenOut1.tolist()) == len(alreadySeenOut2.tolist()))
        assert (set(activeSetOut1.tolist()) == set(activeSetOut2.tolist()))
        assert (len(activeSetOut1.tolist()) == len(activeSetOut2.tolist()))

        alreadySeenSetIn = alreadySeenOut1
        activeSetIn = activeSetOut1
Пример #3
0
 def avdl2Ndfo(self):
     node2AvdlList = self.node2AvdlList
     attrCount = len(self.__adfo2Toi)
     retval = [None for _ in range(attrCount)]
     for node, avdlList in node2AvdlList.items():
         for avdl in avdlList:
             retval[avdl] = self.node2Ndfo[node]
     return longTensor(retval, device=self.device)
Пример #4
0
    def encodedAttrSymbolsByAvdlp(self):
        attrValuesVocab = self.torchBatch.dataset.fields[
            "src"].vocabs.attrValues
        attrValues = [
            longTensor(
                [attrValuesVocab.stoi[ch] for ch in self.attrsByAdfo[adfo][1]],
                device=self.device,
            ) for adfo in self.avdl2Adfo if self.attrsByAdfo[adfo][1]
        ]

        if attrValues:
            return rnn.pack_sequence(attrValues)
        else:
            return longTensor(
                [],
                device=self.device,
            )
Пример #5
0
    def posNbrhoodGraphByGndtol(self):
        """
        Mapping of GNTDOL indices to GNI indices.
        """
        adjListTensor = [
            longTensor(sorted([
                self._gni2Gndtol[nbrGni] for nbrGni in
                self.posNbrhoodGraphByGni[self.gndtol2Gni[gndtol]]
            ]),
                       device=self.device)
            for gndtol in range(self.graphNodeCount)
        ]
        adjLengthsTensor = longTensor(
            [len(adjList) for adjList in adjListTensor], device=self.device)

        adjListTensor = rnn.pad_sequence(adjListTensor, batch_first=True)

        return (adjListTensor, adjLengthsTensor)
Пример #6
0
    def encodedAttrLabelsByAvdl(self):
        attrsVocab = self.torchBatch.dataset.fields["src"].vocabs.attrs

        return longTensor(
            [
                attrsVocab.stoi[self.attrsByAdfo[adfo][0]]
                for adfo in self.avdl2Adfo
            ],
            device=self.device,
        )
Пример #7
0
 def parentSelectorByNdfo(self):
     return longTensor(
         [
             self.node2Ndfo[
                 self.node2Parent[node]]  # NDFO index of the parent.
             for node in
             self.ndfo2Node  # Node at NDFO postion in the selector list.
         ],
         device=self.device,
     )
Пример #8
0
    def avdlAttrSelectorsListByNdac(self):
        maxAttrCount = len(self.ndfo2Node[self.ndac2Ndfo[0]].attrib)
        retval = [[] for _ in range(maxAttrCount)]
        for avdlIndices in self.ndac2AvdlList:
            for attrNumber, avdlIndex in enumerate(avdlIndices):
                retval[attrNumber].append(avdlIndex)

        # Reverse, because we want the items to come in increasing order of length.
        retval.reverse()
        retval = [longTensor(item, device=self.device) for item in retval]

        return retval
Пример #9
0
 def childSelectorByNdfoList(self):
     # Build NDFO child node selector lists.
     # The below loop only works because all nodes are in decreasing fanout order.
     maxNodeFanout = len(self.ndfo2Node[0])
     retval = [[] for _ in range(maxNodeFanout)]
     for node in self.ndfo2Node:
         for childNumber, childNode in enumerate(node):
             ndfoChildNodeIndex = self.node2Ndfo[childNode]
             retval[childNumber].append(ndfoChildNodeIndex)
     retval.reverse()
     retval = [longTensor(item, device=self.device) for item in retval]
     return retval
Пример #10
0
    def __iter__(self, mode=AppMode.Generate):
        if self.savedBatches is None:
            self.savedBatches = []
            for batch in super().__iter__():
                savedBatchData = AttrTuple()
                processedBatch = self.preprocess_batch(batch)
                savedBatchData.sampleCount = len(processedBatch.torchBatch.src)
                savedBatchData.encodedNodesByNdfo = processedBatch.encodedNodesByNdfo
                savedBatchData.parentSelectorByNdfo = processedBatch.parentSelectorByNdfo
                savedBatchData.childSelectorByNdfoList = processedBatch.childSelectorByNdfoList
                savedBatchData.decreasingFanoutsFactorByNdfo = processedBatch.decreasingFanoutsFactorByNdfo
                savedBatchData.encodedAttrLabelsByAvdl = processedBatch.encodedAttrLabelsByAvdl
                savedBatchData.encodedAttrSymbolsByAvdlp = processedBatch.encodedAttrSymbolsByAvdlp
                savedBatchData.avdl2Ndac = longTensor(processedBatch.avdl2Ndac,
                                                      device=self.device)
                savedBatchData.ndac2Ndfo = longTensor(processedBatch.ndac2Ndfo,
                                                      device=self.device)
                savedBatchData.avdl2Ndfo = processedBatch.avdl2Ndfo
                savedBatchData.avdlAttrSelectorsListByNdac = processedBatch.avdlAttrSelectorsListByNdac
                savedBatchData.decreasingAttrCountsFactorByNdac = processedBatch.decreasingAttrCountsFactorByNdac
                savedBatchData.encodedTextByTtDLP = processedBatch.encodedTextByTtDLP
                savedBatchData.encodedTailByTlDLP = processedBatch.encodedTailByTlDLP
                savedBatchData.ndttl2Ndac = longTensor(
                    processedBatch.ndttl2Ndac, device=self.device)
                savedBatchData.ndtll2Ndttl = processedBatch.ndtll2Ndttl
                savedBatchData.ndfo2Ndtll = longTensor(
                    processedBatch.ndfo2Ndtll, device=self.device)
                savedBatchData.ndfo2Ndac = longTensor(processedBatch.ndfo2Ndac,
                                                      device=self.device)
                savedBatchData.targetOutputsByTdol = processedBatch.targetOutputsByTdol
                savedBatchData.targetOutputLengthsByTdol = processedBatch.targetOutputLengthsByTdol
                savedBatchData.targetOutputsByTdolList = processedBatch.targetOutputsByTdolList
                savedBatchData.tdol2Toi = processedBatch.tdol2Toi
                savedBatchData.toi2Tdol = processedBatch.toi2Tdol
                savedBatchData.gndtol2Tdol = processedBatch.gndtol2Tdol
                savedBatchData.goi2Gndtol = processedBatch.goi2Gndtol
                savedBatchData.gndtol2Gni = longTensor(
                    processedBatch.gndtol2Gni, device=self.device)
                savedBatchData.posNbrhoodGraphByGndtol = processedBatch.posNbrhoodGraphByGndtol
                savedBatchData.fullSpotlight = processedBatch.fullSpotlight
                savedBatchData.targetOutputsByToi = processedBatch.targetOutputsByToi
                savedBatchData.targetOutputLengthsByToi = processedBatch.targetOutputLengthsByToi
                savedBatchData.srcSymbolsByGndtol = processedBatch.srcSymbolsByGndtol

                # Test attrs
                if mode == AppMode.Test:
                    savedBatchData = processedBatch
                elif mode == AppMode.Evaluate:
                    savedBatchData.inputs = processedBatch.inputs
                    savedBatchData.outputs = processedBatch.outputs

                self.savedBatches.append(savedBatchData)

        for processedBatch in self.savedBatches:
            yield processedBatch
Пример #11
0
    def encodedTextByNdtlp2(self):
        retval = [None, None]
        textVocab = self.torchBatch.dataset.fields["src"].vocabs.text
        for i, isTail in enumerate([False, True]):

            def getText(node):
                return node.tail if isTail else node.text

            # Get packed text.
            if self.ndtl2Node2[isTail]:
                result = [getText(node) for node in self.ndtl2Node2[isTail]]
                result = [
                    longTensor([textVocab.stoi[ch] for ch in text],
                               device=self.device) for text in result
                    if text not in [None, ""]
                ]
                if result:
                    retval[i] = rnn.pack_sequence(result)
                else:
                    retval[i] = None
        return retval
Пример #12
0
    def goi2Gni(self):
        retval = []
        # ndfo2Toi, avdl2Toi, avdlp2Toi, ndtlp2Toi2[0], ndtlp2Toi2[1].

        # Process nodes.
        for xmlTree in self.inputs:
            for node in xmlTree.iter():
                ndfo = self.node2Ndfo[node]
                retval.append(self.ndfo2Gni[ndfo])

        # Process attributes.
        for xmlTree in self.inputs:
            for node in xmlTree.iter():
                ndfo = self.node2Ndfo[node]
                ndac = self.ndfo2Ndac[ndfo]
                for attrIndex in range(len(node.attrib)):
                    avdl = self.attrTuple2Avdl[(ndac, attrIndex)]
                    retval.append(self.avdl2Gni[avdl])

        # Process text and tail.
        for isTail in [False, True]:
            ndtxp2Gni = self.ndtlp2Gni if isTail else self.ndttp2Gni
            for xmlTree in self.inputs:
                for node in xmlTree.iter():
                    tailOrText = node.tail if isTail else node.text
                    if not tailOrText:
                        continue
                    ndfo = self.node2Ndfo[node]
                    ndtx2 = self.ndfo2Ndtl2[isTail][ndfo]
                    for symIndex in range(len(tailOrText)):
                        ndtxp2 = self.ndtxTuple2Ndtlp2[isTail][(ndtx2,
                                                                symIndex)]
                        retval.append(ndtxp2Gni[ndtxp2])

        # Safety assertions.
        assert (len(retval) == self.graphNodeCount)
        assert (len(set(retval)) == self.graphNodeCount)

        return longTensor(retval, device=self.device)
Пример #13
0
 def tdol2Toi(self):
     retval = list(range(len(self.targetOutputLengthsByToi)))
     retval.sort(key=lambda toi: (-self.targetOutputLengthsByToi[toi], toi))
     return longTensor(retval, device=self.device)
Пример #14
0
 def ndtll2Ndttl(self):
     return longTensor([self.ndfo2Ndttl[ndfo] for ndfo in self.ndtll2Ndfo],
                       device=self.device)
Пример #15
0
    def cullSmallFactors(
        gndtol2Tdol,
        beamMode,
        discoveredGndtol,
        attentionFactors,
        maxAttentionFactorByTDOL,
        spotlightThreshold,
    ):
        """
            Inputs
                discoveredGndtol:
                    Shape: sliCount
                attentionFactors:
                    if beamMode:
                        Shape: beamCount X sliCount
                    else:
                        Shape: sliCount
                maxAttentionFactorByTDOL
                    if beamMode:
                        Shape: treeCount X beamCount
                    else:
                        Shape: treeCount
            Outputs:
        """
        # Get TDOL of each SLI.
        #   Shape: sliCount
        #   Value: tree index of the SLI.
        discoveredGndtol2Tdol = gndtol2Tdol[discoveredGndtol]

        # Indexing below
        # if beamMode:
        #     Shape: sliCount X beamCount
        # else:
        #     Shape: sliCount
        maxAttentionFactorToUse = maxAttentionFactorByTDOL[
            discoveredGndtol2Tdol]

        if beamMode:
            # Shape: beamCount X sliCount
            # Permute last two dimensions to make it ready for comparison.
            maxAttentionFactorToUse = maxAttentionFactorToUse.permute(1, 0)

        # Purpose of comparison is to cull small(1/1000) factors.
        maxAttentionFactorToUse *= spotlightThreshold

        # Compare.
        # Shape: beamCount X SliCount.
        retainedIndicesBool = (attentionFactors > maxAttentionFactorToUse)

        if beamMode:
            # Collapse along beamCount dimension.
            # Retain if any beam is suggesting retention.
            retainedIndicesBool = (torch.sum(retainedIndicesBool, dim=0) != 0)

        retainedCount = torch.sum(retainedIndicesBool)
        if retainedCount == 0:
            return False
        elif retainedCount == len(retainedIndicesBool):
            return True

        retainedIndices = longTensor([
            i for i, _ in enumerate(discoveredGndtol) if retainedIndicesBool[i]
        ],
                                     device=retainedIndicesBool.device)
        return retainedIndices
Пример #16
0
 def fullSpotlight(self):
     return longTensor(list(range(self.graphNodeCount)), device=self.device)
Пример #17
0
 def gni2Gndtol(self):
     """
     Mapping of GNI indices to GNTDOL indices.
     """
     return longTensor(self._gni2Gndtol, device=self.device)
Пример #18
0
 def outputs(self):
     tgt, tgtLengths = self.torchBatch.tgt
     tgt = torch.tensor(tgt, device=self.device)
     tgtLengths = longTensor(tgtLengths, device=self.device)
     return tgt, tgtLengths
Пример #19
0
 def gni2Tdol(self):
     return longTensor([int(self.toi2Tdol[toi]) for toi in self.gni2Toi],
                       device=self.device)
Пример #20
0
 def toi2Tdol(self):
     return longTensor(invertPermutation(self.tdol2Toi.tolist()),
                       device=self.device)
Пример #21
0
 def encodedNodesByNdfo(self):
     tagVocab = self.torchBatch.dataset.fields["src"].vocabs.tags
     encodedNodesData = [tagVocab.stoi[node.tag] for node in self.ndfo2Node]
     return longTensor(encodedNodesData, device=self.device)