示例#1
0
 def __init__(self, regexp):
     self.regexp = regexp
     self.m = len(regexp)
     self.graph = Digraph(self.m + 1)
     ops = []
     for i in range(0, self.m):
         lp = i
         if regexp[i] == '(' or regexp[i] == '|':
             ops += [i]
         elif regexp[i] == ')':
             or_ = ops.pop()
             if regexp[or_] == '|':
                 lp = ops.pop()
                 self.graph.addEdge(lp, or_ + 1)
                 self.graph.addEdge(or_, i)
             elif regexp[or_] == '(':
                 lp = or_
             else:
                 assert False
         if i < self.m - 1 and regexp[i + 1] == '*':
             self.graph.addEdge(lp, i + 1)
             self.graph.addEdge(i + 1, lp)
         if regexp[i] == '(' or regexp[i] == '*' or regexp[i] == ')':
             self.graph.addEdge(i, i + 1)
     if len(ops) != 0:
         raise ValueError("Invalid regular expression")
示例#2
0
def read_graph(filename):
    with open(filename, 'r') as reader:
        g = Digraph(int(reader.readline()))
        for line in reader:
            (s, d, w) = line.split()
            g.insert_arc(int(s), int(d), float(w))
        return g
示例#3
0
class NFA:
    regexp = None
    m = 0

    def __init__(self, regexp):
        self.regexp = regexp
        self.m = len(regexp)
        self.graph = Digraph(self.m + 1)
        ops = []
        for i in range(0, self.m):
            lp = i
            if regexp[i] == '(' or regexp[i] == '|':
                ops += [i]
            elif regexp[i] == ')':
                or_ = ops.pop()
                if regexp[or_] == '|':
                    lp = ops.pop()
                    self.graph.addEdge(lp, or_ + 1)
                    self.graph.addEdge(or_, i)
                elif regexp[or_] == '(':
                    lp = or_
                else:
                    assert False
            if i < self.m - 1 and regexp[i + 1] == '*':
                self.graph.addEdge(lp, i + 1)
                self.graph.addEdge(i + 1, lp)
            if regexp[i] == '(' or regexp[i] == '*' or regexp[i] == ')':
                self.graph.addEdge(i, i + 1)
        if len(ops) != 0:
            raise ValueError("Invalid regular expression")

    def recognizes(self, txt):
        dfs = DirectedDFS(self.graph, [0])
        pc = []
        for v in range(0, self.graph.Vertex()):
            if dfs.marked[v]:
                pc += [v]
        for i in range(0, len(txt)):
            if txt[i] == '*' or txt[i] == '|' or txt[i] == '(' or txt[i] == ')':
                raise ValueError("text contains the metacharacter %s" %
                                 (txt[i]))

            match = []
            for v in pc:
                if v == self.m:
                    continue
                if self.regexp[v] == txt[i] or self.regexp[v] == '.':
                    match += [v + 1]
            dfs = DirectedDFS(self.graph, match)
            pc = []
            for v in range(0, self.graph.Vertex()):
                if dfs.marked[v]:
                    pc += [v]
            if len(pc) == 0:
                return False
        for v in pc:
            if v == self.m:
                return True
        return False
示例#4
0
    def addEdge(self, edge):
        """
        Adds an edge to the graph

        Requires: edge Edge
        Ensures: edge[src] = dest and edge[dest] = src 
        """
        Digraph.addEdge(self, edge)
        rev = Edge(edge.getDestination(), edge.getSource())
        Digraph.addEdge(self, rev)
class SymbolDigraph:
    def __init__(self, filename, delimiter):
        self.st = dict()
        self.keys = dict()
        delimiter = None
        myin = In(filename, delimiter)
        while myin.hasNextLine():
            a = myin.readLine().split(delimiter)
            for i in range(0, len(a)):
                if self.st.get(a[i]) == None:
                    self.st[a[i]] = len(self.st)

        for name in self.st.keys():
            self.keys[self.st[name]] = name

        self.graph = Digraph(len(self.st.keys()))
        myin = In(filename, '\n')
        while myin.hasNextLine():
            a = myin.readLine().split(' ')
            v = self.st.get(a[0])
            for i in range(1, len(a)):
                w = self.st.get(a[i])
                self.graph.addEdge(v, w)

    def contains(self, s):
        if self.st.get(s) == None:
            return False
        else:
            return True

    def index(self, s):
        return self.st.get(s)

    def indexOf(self, s):
        return self.get(s)

    def name(self, v):
        self.validateVertex(v)
        return self.keys[v]

    def nameOf(self, v):
        self.validateVertex(v)
        return self.keys[v]

    def G(self):
        return self.graph

    def digraph(self):
        return self.graph

    def validateVertex(self, v):
        Ver = self.graph.Vertex()
        if v < 0 or v >= Ver:
            raise ("vertex %d is not between 0 and %d " % (v, Ver - 1))
示例#6
0
    def __init__(self, s):
        self._ops = list()
        self._re = list(self.__convert(s))
        self._M = len(self._re)
        self._G = Digraph(self._M + 1)

        for i in range(self._M):
            lp = i  # left parenthesis(bracket, brace), used for closure

            # (), |
            if self._re[i] == '(' or self._re[i] == '|':
                self._ops.append(i)
            elif self._re[i] == ')':
                or_pos = list()
                _or = self._ops.pop()
                while self._re[_or] == '|':
                    or_pos.append(_or)
                    _or = self._ops.pop()
                lp = _or  # left parenthesis
                for pos in or_pos:
                    self._G.add_edge(lp, pos + 1)
                    self._G.add_edge(pos, i)

            # meta characters, support only convert meta character
            # \, ., |, *, (, ), +, [, ], {, }
            if i < self._M - 1 and self._re[i] == '\\':
                escape = '\\.*+?|()[]{}'  # '\\.|*()+[]{}'
                if escape.find(self._re[i + 1]):
                    self._G.add_edge(i, i + 1)
                else:
                    print("please don't use only one \\ "
                          "or \\(special character) like \\s,"
                          " which is not finish")

            # closure, and look forward to check
            # * closure, zero or more recognizes
            if i < self._M - 1 and self._re[i + 1] == '*':
                self._G.add_edge(lp, i + 1)
                self._G.add_edge(i + 1, lp)
            # + closure, one or more recognizes
            if i < self._M - 1 and self._re[i + 1] == '+':
                self._G.add_edge(i + 1, lp)
            # ? closure, zero or one recognizes
            if i < self._M - 1 and self._re[i + 1] == '?':
                self._G.add_edge(lp, i + 1)

            # keep moving
            if self._re[i] == '(' or \
                    self._re[i] == '*' or \
                    self._re[i] == ')' or \
                    self._re[i] == '+' or \
                    self._re[i] == '?':
                self._G.add_edge(i, i + 1)
示例#7
0
    def test_Digraph(self):
        gr = Digraph(5)
        gram = Digraph_AM(5)
        for item in self.G:
            gr.addEdge(item[0], item[1])
            gram.addEdge(item[0], item[1])

        self.assertTrue(gr.V == gram.V)
        self.assertTrue(gr.E == gram.E)

        a = gr.adjlist(1)
        b = gram.adjlist(1)
        self.assertTrue(a == b)
示例#8
0
def main():
    filename = raw_input('input the graph path: ')
    G = Digraph(filename)
    print 'The adjacent table of the input Graph is: \n'
    print G

    print ''
    #Start_point = raw_input('input the start point: ')
    DFS = Depth_First_Order(G)
    print '\nThe order used for the SCC search of orininal G is:: '
    print DFS.topo_list()
    print '\nThe toplocical order the the graph is: '
    DFS.show_topo_order()
    
    print '----------------the reversed G------------------'
    print G.reverse()
    DFS = Depth_First_Order(G.reverse())
    print '\nThe order used for the SCC of reversed G is: '
    print DFS.topo_list()
    print '\nThe toplocical order the the reversed graph is: '
    DFS.show_topo_order()
    def __init__(self, filename, delimiter):
        self.st = dict()
        self.keys = dict()
        delimiter = None
        myin = In(filename, delimiter)
        while myin.hasNextLine():
            a = myin.readLine().split(delimiter)
            for i in range(0, len(a)):
                if self.st.get(a[i]) == None:
                    self.st[a[i]] = len(self.st)

        for name in self.st.keys():
            self.keys[self.st[name]] = name

        self.graph = Digraph(len(self.st.keys()))
        myin = In(filename, '\n')
        while myin.hasNextLine():
            a = myin.readLine().split(' ')
            v = self.st.get(a[0])
            for i in range(1, len(a)):
                w = self.st.get(a[i])
                self.graph.addEdge(v, w)
    return self.count
  
  def processVertexEarly(self, s):
    #print "Discovered vertex ", s
    return
    
  def processVertexLate(self, s):
    return
  
  def processEdge(self,s, n):
    #print "Processed Edge ", s, " -> ", n
    return

if __name__ == '__main__':
  inputFile = sys.argv[1]
  sourceId = int(sys.argv[2])
  g = Digraph(inputFile)
  d = DirectedDFS(g, sourceId)

  for vid in g.getVertexIds():
    print sourceId, "to", vid, ":",
    print d.pathTo(vid)
  print ""
  
 # if d.getCount() == g.getNoOfVertices():
 #   print "connected"
 # else:
 #   print "NOT connected"
  

def openTwoPathOpenTimes(numNodes, transaction_list, verbose):
    """Given a list of transactions which are in the form of 
    a list of (sender, receiver, timestamp) tuples, return the list
    of times at open two-paths are created.
    
    Parameters:
        numNodes         - number of actors (nodes)
        transaction_list - list of (sender, receiver, timestamp) tuples.
                           Sender and receiver are intergers in 0..numNodes-1
                           and timestamps are numeric values.
                           The list must be ordered by timestamp ascending.
        verbose          - if True write debug output to stdout

    Return value: 
        dict { (i, j, k) : t } where (i, j, k) is an open
        directed two-path (note this means it is not part of a
        transitive triad i.e. i -> k is not present, but it may be
        part of a cyclic triad i.e. k -> i may be present), and t is
        the time the two-path was created (open). BUT only if the
        second arc in two-path has higher timestamp than the first
        (i.e. we don't count a two-path that goes backward in time
        along the path)

    """
    G = Digraph(numNodes)
    lastTime = None
    pathdict = {}  # dict mapping (i,j,k) two-path tuple to open time
    for trans in transaction_list:
        assert (trans[TSENDER] >= 0 and trans[TSENDER] < numNodes)
        assert (trans[TRECEIVER] >= 0 and trans[TRECEIVER] < numNodes)
        assert (lastTime is None or trans[TTIME] >= lastTime)
        if verbose:
            print trans[TSENDER], '->', trans[TRECEIVER], ' at time ', trans[
                TTIME],
        (ulist, vlist) = openTwoPaths(G, trans[TSENDER], trans[TRECEIVER])
        i = trans[TSENDER]
        j = trans[TRECEIVER]
        if len(ulist) > 0 or len(vlist) > 0:
            if verbose:
                print 'opened', len(ulist) + len(vlist), 'two-paths'
            for u in ulist:  # u -> i -> j
                path_1st_time = G.G[u][i]
                path_2nd_time = trans[TTIME]  # will be G.G[i][j] when inserted
                if path_2nd_time > path_1st_time:
                    if verbose:
                        print '  path from', u, 'is forward in time (', path_1st_time, ',', path_2nd_time, '), including'
                    if not pathdict.has_key((u, i, j)):
                        pathdict[(u, i, j)] = path_2nd_time
                    else:
                        if verbose:
                            print '  two-path ', u, i, j, 'already present from time', pathdict[
                                (u, i, j)], ' not updating'
                else:
                    if verbose:
                        print '  path from', u, 'is backwards in time (', path_1st_time, ',', path_2nd_time, '), skipping'
            if len(vlist) > 0:
                if verbose:
                    print '  ', len(vlist), ' are backward in time, skipping'
            for v in vlist:  #  i -> j -> v
                path_1st_time = trans[TTIME]  # will be G.G[i][j] when inserted
                path_2nd_time = G.G[j][v]
                # as the transactions are ordered in time we cannot
                # have this a two-path ordered in time, as the 2nd is older
                assert (path_1st_time > path_2nd_time)
        else:
            if verbose:
                print

        # now check if the new arc i -> j would close any currently open
        # two-paths. If so, remove those from the dictionary of open
        # two paths.
        for v in closedTwoPaths(G, i, j):
            # For each v, i -> v -> j is now a two-path closed by i -> j
            if verbose:
                print '  removing ', i, v, j, ' as it is now a transitive triad'
            # note (i, v, j) might exist in pathdict as it was an open two-path
            # but NOT NECESSARILY as it might have been ignored as backward in time
            if pathdict.has_key((i, v, j)):
                pathdict.pop((i, v, j))
            else:
                if verbose:
                    print '  (did not exist in dict)'

        # add this new arc i -> j to the graph
        # note there is a potential inconsistency here in that this
        # arc might already exist in which case we update the time with
        # the new (later) time, however in the pathdict dictionary
        # we do not update times of open two-paths but keep the first
        # opening time. Need to decide which one really is correct.
        G.insertArc(trans[TSENDER], trans[TRECEIVER], trans[TTIME])

        lastTime = trans[TTIME]
    return pathdict
示例#12
0
'''WinPython 3.4.4 64-bit'''

import time
from Digraph import Digraph

# read file: 14.1 seconds
start = time.time()
digraph = Digraph()    
with open("SCC.txt", "rt") as f:
    for line in f:
        tail, head = [int(value) for value in line.split()]
        digraph.add_edge(tail, head)
finish = time.time()
elapsed = 1.0 * (finish - start)
print("Time to read file: %.3f" % (elapsed,))
print("Number of graph vertices: %d" % (digraph.num_nodes(),))
print()

# calculate scc sizes: 9.100 seconds
# answer: 
start = time.time()
scc_sizes = digraph.scc_sizes()
finish = time.time()
elapsed = 1.0 * (finish - start)
print("Calculation time: %.3f" % (elapsed,))

# print out answer
print("Component sizes: ", end="")
for i in range(5):
    print("%d " % (scc_sizes[i],), end="")
print()
示例#13
0
 def addEdge(self, edge):
   Digraph.addEdge(self, edge)
   rev = Edge(edge.getDestination(), edge.getSource())
   Digraph.addEdge(self, rev)
示例#14
0
                    self.edgeTo[w] = v

    def hasPathTo(self, v):
        return self.marked[v]

    def pathTo(self, v):
        if (not self.hasPathTo(v)):
            return None
        else:
            path = [v]
            x = v
            while not (x == self.s):
                path.append(self.edgeTo[x])
                x = self.edgeTo[x]
            # path.append(v)
            return list(reversed(path))


if __name__ == '__main__':
    from Digraph import Digraph
    from In import In
    import sys

    in_ = In(sys.argv[1])
    G = Digraph(fileobject=in_)
    bfp = BreadthFirstPaths(G, 0)
    print(bfp.edgeTo)
    print(bfp.marked)
    print(bfp.pathTo(3))
    print(bfp.distTo)
示例#15
0
class NFA:
    """This class provides a data type for creating a
    non-deterministic finite state automaton(NFA) from a regular expression
    and testing whether a given string is matched by that regular expression.
    """
    def __init__(self, s):
        self._ops = list()
        self._re = list(self.__convert(s))
        self._M = len(self._re)
        self._G = Digraph(self._M + 1)

        for i in range(self._M):
            lp = i  # left parenthesis(bracket, brace), used for closure

            # (), |
            if self._re[i] == '(' or self._re[i] == '|':
                self._ops.append(i)
            elif self._re[i] == ')':
                or_pos = list()
                _or = self._ops.pop()
                while self._re[_or] == '|':
                    or_pos.append(_or)
                    _or = self._ops.pop()
                lp = _or  # left parenthesis
                for pos in or_pos:
                    self._G.add_edge(lp, pos + 1)
                    self._G.add_edge(pos, i)

            # meta characters, support only convert meta character
            # \, ., |, *, (, ), +, [, ], {, }
            if i < self._M - 1 and self._re[i] == '\\':
                escape = '\\.*+?|()[]{}'  # '\\.|*()+[]{}'
                if escape.find(self._re[i + 1]):
                    self._G.add_edge(i, i + 1)
                else:
                    print("please don't use only one \\ "
                          "or \\(special character) like \\s,"
                          " which is not finish")

            # closure, and look forward to check
            # * closure, zero or more recognizes
            if i < self._M - 1 and self._re[i + 1] == '*':
                self._G.add_edge(lp, i + 1)
                self._G.add_edge(i + 1, lp)
            # + closure, one or more recognizes
            if i < self._M - 1 and self._re[i + 1] == '+':
                self._G.add_edge(i + 1, lp)
            # ? closure, zero or one recognizes
            if i < self._M - 1 and self._re[i + 1] == '?':
                self._G.add_edge(lp, i + 1)

            # keep moving
            if self._re[i] == '(' or \
                    self._re[i] == '*' or \
                    self._re[i] == ')' or \
                    self._re[i] == '+' or \
                    self._re[i] == '?':
                self._G.add_edge(i, i + 1)

    def recognizes(self, txt):
        pc = [0]
        # 0 is source, the state in start
        dfs = DirectedDFS(self._G, pc)
        pc.clear()
        # initialize the states collection, which the first state can arrived
        for v in range(self._G.V):
            if dfs.marked(v):
                pc.append(v)

        # calculate all of NFA states that txt[i+1] can arrived
        for i in range(len(txt)):
            recognizes = list()
            # calculate arrived states after recognizes
            for v in pc:
                if v < self._M:
                    if self._re[v] == txt[i] or self._re[v] == '.':
                        recognizes.append(v + 1)

            pc.clear()
            # calculate states, which epsilon transform can arrived after recognizes
            dfs = DirectedDFS(self._G, recognizes)
            for v in range(self._G.V):
                if dfs.marked(v):
                    pc.append(v)

        for v in pc:
            if v == self._M:
                return True
        return False

    def __convert(self, s):
        """using convert to straight implement some pattern
            like using (A|B|C) to implement [ABC]
            and AAAA* to A{3,}
        """
        seq = deque()

        i = 0
        length = len(s)
        while i < length:
            if s[i] == '\\':
                seq.append(s[i])  # add '\'
                seq.append(s[i + 1])  # add the character to convert
                i += 1
            elif s[i] == '[':  # [ABC] -> (A|B|C)
                seq.append('(')
                i += 1
                while s[i] != ']':
                    seq.append(s[i])
                    seq.append('|')
                    i += 1
                seq.pop()
                seq.append(')')
            elif s[i] == '{':  # A{3}->AAA, A{3,5}->AAAA?A?, A{3,}->AAAA*
                in_brace = ''
                num1, num2 = 0, 0
                multiple, _range, more = False, False, False  # {3},{3,5},{3,}
                # get content in brace
                i += 1
                while s[i] != '}':
                    in_brace += s[i]
                    i += 1
                # get the type of range
                if ',' in in_brace:
                    nums = in_brace.split(',')
                    num1 = int(nums[0])
                    if nums[1] == '':
                        more = True
                    elif nums[1] != '':
                        _range = True
                        num2 = int(nums[1])
                else:
                    multiple = True
                    num1 = int(in_brace)
                # get the basic unit used for multiple
                unit = list()
                if seq[-1] == ')':
                    unit.append(seq.pop())  # add ')'
                    if seq[-1] == '\\':
                        unit.append(seq.pop())  # add '\'
                    else:
                        lp_count = 0
                        rp_count = 1
                        while lp_count != rp_count:
                            if seq[-1] == ')':
                                rp_count += 1
                            elif seq[-1] == '(':
                                lp_count += 1
                            unit.append(seq.pop())
                else:
                    unit.append(seq.pop())

                # add multiple unit to seq
                def seq_add_unit(_seq, _unit):
                    for k in range(len(_unit) - 1, -1, -1):
                        _seq.append(_unit[k])

                while num1 > 0:
                    seq_add_unit(seq, unit)
                    num1 -= 1
                if multiple:
                    pass  # no-statement
                elif _range:
                    times = num2 - num1
                    while times > 0:
                        seq_add_unit(seq, unit)
                        seq.append('?')
                        times -= 1
                elif more:
                    seq_add_unit(seq, unit)
                    seq.append('*')
            else:
                seq.append(s[i])

            i += 1
        # generator result
        result = ''
        for ch in seq:
            result += ch
        return result
示例#16
0
def run_on_network_attr(edgelist_filename, param_func_list, labels,
                        outcome_bin_filename,
                        binattr_filename=None,
                        contattr_filename=None,
                        catattr_filename=None,
                        EEiterations    = 50000,
                        run = None,
                        learningRate = 0.01,
                        sampler_func = basicALAAMsampler,
                        zone_filename= None,
                        directed = False):
    """Run on specified network with binary and/or continuous
    and categorical attributes.
    
    Parameters:
         edgelist_filename - filename of Pajek format edgelist 
         param_func_list   - list of change statistic functions corresponding
                             to parameters to estimate
         labels            - list of strings corresponding to param_func_list
                             to label output (header line)
         outcome_bin_filename - filename of binary attribute (node per line)
                                of outcome variable for ALAAM
         binattr_filename - filename of binary attributes (node per line)
                            Default None, in which case no binary attr.
         contattr_filename - filename of continuous attributes (node per line)
                            Default None, in which case no continuous attr.
         catattr_filename - filename of categorical attributes (node per line)
                            Default None, in which case no categorical attr.
         EEiterations     - Number of iterations of the EE algorithm.
                            Default 50000.
         run              - run number for parallel runs, used as suffix on 
                            output filenames. Default None
                            in which case no suffix added to output files.
         learningRate        - learning rate (step size multiplier, a)
                               defult 0.01
         sampler_func        - ALAAM sampler function with signature
                               (G, A, changestats_func_list, theta, performMove,
                                sampler_m); see basicALAAMsampler.py
                               default basicALAAMsampler
         zone_filename   - filename of snowball sampling zone file 
                           (header line 'zone' then zone number for nodes,
                           one per line)
                           Default None, in which case no snowball zones.
                           If not None then the sampler_func should take
                           account of snowball sample zones i.e.
                           conditionalALAAMsampler()
         directed        - Default False.
                           True for directed network else undirected.



    Write output to ifd_theta_values_<basename>_<run>.txt and
                    ifd_dzA_values_<basename>_<run>.txt
    where <basename> is the baesname of edgelist filename e..g
    if edgelist_filename is edges.txt then ifd_theta_values_edges_0.txt
    and ifd_dzA_values_edges_0.txt etc.
    WARNING: these files are overwritten.

    """
    assert(len(param_func_list) == len(labels))
    basename = os.path.splitext(os.path.basename(edgelist_filename))[0]
    THETA_OUTFILENAME = THETA_PREFIX + basename
    DZA_OUTFILENAME = DZA_PREFIX + basename
    if run is not None:
        THETA_OUTFILENAME += '_' + str(run)
        DZA_OUTFILENAME += '_' + str(run)
    THETA_OUTFILENAME += os.extsep + 'txt'
    DZA_OUTFILENAME   += os.extsep + 'txt'

    if directed:
        G = Digraph(edgelist_filename, binattr_filename, contattr_filename,
                    catattr_filename, zone_filename)
    else:
        G = Graph(edgelist_filename, binattr_filename, contattr_filename,
                  catattr_filename, zone_filename)

    G.printSummary()
    
    outcome_binvar = list(map(int_or_na, open(outcome_bin_filename).read().split()[1:]))
    assert(len(outcome_binvar) == G.numNodes())
    A = outcome_binvar
    print('positive outcome attribute = ', (float(A.count(1))/len(A))*100.0, '%')
    assert( all([x in [0,1,NA_VALUE] for x in A]) )

    if NA_VALUE in A:
        print('Warning: outcome variable has', A.count(NA_VALUE), 'NA values')

    A = np.array(A) # convert list to numpy vector
    
    # steps of Alg 1    
    M1 = 100

    #OLD: Mouter = 500 # outer iterations of Algorithm EE
    #OLD: Msteps = 100 # multiplier for number of inner steps of Algorithm EE
    #OLD: print 'M1 = ', M1, ' Mouter = ', Mouter, ' Msteps = ', Msteps

    print('M1 = ', M1, ' EEiterations = ', EEiterations, end=' ') 
    print('learningRate = ', learningRate, end=' ')
    
    theta_outfile = open(THETA_OUTFILENAME, 'w',1) # 1 means line buffering
    theta_outfile.write('t ' + ' '.join(labels) + ' ' + 'AcceptanceRate' + '\n')
    print('Running Algorithm S...', end=' ')
    start = time.time()
    (theta, Dmean) = algorithm_S(G, A, param_func_list, M1, theta_outfile,
                                 sampler_func)
    print(time.time() - start, 's')
    print('after Algorithm S:')
    print('theta = ', theta)
    print('Dmean = ', Dmean)
    dzA_outfile = open(DZA_OUTFILENAME, 'w',1)
    dzA_outfile.write('t ' + ' '.join(labels) + '\n')
    print('Running Algorithm EE...', end=' ')
    start = time.time()
    #OLD: theta = algorithm_EE(G, A, param_func_list, theta, Dmean,
    #OLD:                     Mouter, Msteps, theta_outfile, dzA_outfile)
    theta = algorithm_EE(G, A, param_func_list, theta, 
                         EEiterations, theta_outfile, dzA_outfile, learningRate,
                         sampler_func)

    print(time.time() - start, 's')
    theta_outfile.close()
    dzA_outfile.close()
    print('at end theta = ', theta)
示例#17
0
from DepthFirstOrder import DepthFirstOrder
from DigraphCycle import DirectedCycle

class Topological:
	def __init__(self,digraph):
		self._order = None
		cycleFinder = DirectedCycle(digraph)
		if not cycleFinder.hasCycle():
			dfs = DepthFirstOrder(digraph)
			self._order = dfs.reversePost

	#Directed Acyclical Graph: A graph with no cycles 
	def isDAG(self):
		return self._order is not None

	def order(self):
		return self._order


j = Digraph(6)
j.addEdge(1,2)
j.addEdge(1,3)
j.addEdge(0,2)
j.addEdge(5,1)
j.addEdge(3,4)
j.addEdge(0,4)

z = Topological(j)
print(z.isDAG())

示例#18
0
    def dfs(self, G, v):
        self._marked[v] = True
        for w in G.adj(v):
            if not self._marked[w]:
                self.dfs(G, w)

    def marked(self, v):
        return self._marked[v]


# unittest
if __name__ == '__main__':
    with open('tinyDG.txt', 'r') as g:
        V = int(g.readline().split()[0])
        E = int(g.readline().split()[0])
        DG = Digraph(V)
        for e in range(E):
            v, w = g.readline().split()
            DG.add_edge(v, w)

    # sources: 1, 2, 6
    # reachable: 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12   # don't have 7
    sources = [1, 2, 6]
    reachable = DirectedDFS(DG, sources)
    for i in range(V):
        if i == 7:
            assert reachable.marked(i) is False
        else:
            assert reachable.marked(i) is True
			print(s)
			if self.marked[s] is False:
				self._dfs(digraph,s)
				self.count += 1

	def _dfs(self,digraph,v):
		self.marked[v] = True
		self.id[v] = self.count
		for w in digraph.adj(v):
			if self.marked[w] is False:
				self._dfs(digraph,w)

	def stronglyConnected(self,v,w):
		print("id of V:",self.id[v])
		print("id of W:",self.id[w])
		return self.id[v] == self.id[w]


j = Digraph(6)
j.addEdge(1,2)
j.addEdge(1,3)
j.addEdge(0,2)
j.addEdge(5,1)
j.addEdge(3,4)
j.addEdge(0,4)

z = KosarajuSCC(j)
print(z.stronglyConnected(3,4))

for x in z.id:
	print(x)
def run_on_network_attr(edgelist_filename,
                        param_func_list,
                        labels,
                        binattr_filename=None,
                        catattr_filename=None):
    """
    Run on specified network with binary and/or categorical attributes.
    
    Parameters:
         edgelist_filename - filename of Pajek format edgelist 
         param_func_list   - list of change statistic functions corresponding
                             to parameters to estimate
         labels            - list of strings corresponding to param_func_list
                             to label output (header line)
         binattr_filename - filename of binary attributes (node per line)
                            Default None, in which case no binary attr.
         catattr_filename - filename of categorical attributes (node per line)
                            Default None, in which case no categorical attr.
    Write output to ifd_theta_values_<basename>.txt and
                    ifd_dzA_values_<basename>.txt
    where <basename> is the baesname of edgelist filename e..g
    if edgelist_filename is edges.txt then ifd_theta_values_edges.txt
    and ifd_dzA_values_edges.txt
    WARNING: these files are overwritten.
    """
    assert (len(param_func_list) == len(labels))
    basename = os.path.splitext(os.path.basename(edgelist_filename))[0]
    THETA_OUTFILENAME = THETA_PREFIX + basename + os.extsep + 'txt'
    DZA_OUTFILENAME = DZA_PREFIX + basename + os.extsep + 'txt'

    G = Digraph(edgelist_filename, binattr_filename, catattr_filename)

    M1_steps = 500
    # steps of Alg 1
    M1 = int(M1_steps * G.density() * (1 - G.density()) * G.numNodes()**2 /
             sampler_m)

    Mouter = 500  # outer iterations of Algorithm EE
    Msteps = 100  # multiplier for number of inner steps of Algorithm EE
    # inner steps of EE
    M = int(Msteps * G.density() * (1 - G.density()) * G.numNodes()**2 /
            sampler_m)

    print('M1 = ', M1, ' Mouter = ', Mouter, ' M = ', M)

    theta_outfile = open(THETA_OUTFILENAME, 'w', 1)  # 1 means line buffering
    theta_outfile.write('t ' + ' '.join(labels) + ' ' + 'AcceptanceRate' +
                        '\n')
    print('Running Algorithm S...', end=' ')
    start = time.time()
    (theta, Dmean) = algorithm_S(G, param_func_list, M1, theta_outfile)
    print(time.time() - start, 's')
    print('after Algorithm S:')
    print('theta = ', theta)
    print('Dmean = ', Dmean)
    dzA_outfile = open(DZA_OUTFILENAME, 'w', 1)
    dzA_outfile.write('t ' + ' '.join(labels) + '\n')
    print('Running Algorithm EE...', end=' ')
    start = time.time()
    theta = algorithm_EE(G, param_func_list, theta, Dmean, Mouter, M,
                         theta_outfile, dzA_outfile)
    print(time.time() - start, 's')
    theta_outfile.close()
    dzA_outfile.close()
    print('at end theta = ', theta)
示例#21
0
def run_on_network_attr(edgelist_filename, param_func_list, labels,
                        outcome_bin_filename,
                        binattr_filename=None,
                        contattr_filename=None,
                        catattr_filename=None,
                        sampler_func = basicALAAMsampler,
                        zone_filename = None,
                        directed = False):
    """Run on specified network with binary and/or continuous and
    categorical attributes.
    
    Parameters:
         edgelist_filename - filename of Pajek format edgelist 
         param_func_list   - list of change statistic functions corresponding
                             to parameters to estimate
         labels            - list of strings corresponding to param_func_list
                             to label output (header line)
         outcome_bin_filename - filename of binary attribute (node per line)
                                of outcome variable for ALAAM
         binattr_filename - filename of binary attributes (node per line)
                            Default None, in which case no binary attr.
         contattr_filename - filename of continuous attributes (node per line)
                            Default None, in which case no continuous attr.
         catattr_filename - filename of continuous attributes (node per line)
                            Default None, in which case no categorical attr.
         sampler_func        - ALAAM sampler function with signature
                               (G, A, changestats_func_list, theta, performMove,
                                sampler_m); see basicALAAMsampler.py
                               default basicALAAMsampler
         zone_filename   - filename of snowball sampling zone file 
                           (header line 'zone' then zone number for nodes,
                           one per line)
                           Default None, in which case no snowball zones.
                           If not None then the sampler_func should take
                           account of snowball sample zones i.e.
                           conditionalALAAMsampler()
         directed        - Default False. 
                           True for directed network else undirected.

    Write output to stdout.

    """
    assert(len(param_func_list) == len(labels))

    if directed:
        G = Digraph(edgelist_filename, binattr_filename, contattr_filename,
                    catattr_filename, zone_filename)
    else:
        G = Graph(edgelist_filename, binattr_filename, contattr_filename,
                  catattr_filename, zone_filename)
        

    G.printSummary()

    outcome_binvar = list(map(int_or_na, open(outcome_bin_filename).read().split()[1:]))
    assert(len(outcome_binvar) == G.numNodes())
    A = outcome_binvar

    assert( all([x in [0,1,NA_VALUE] for x in A]) )
    print('positive outcome attribute = ', (float(A.count(1))/len(A))*100.0, '%')
    if NA_VALUE in A:
        print('Warning: outcome variable has', A.count(NA_VALUE), 'NA values')

    # Calculate observed statistics by summing change stats for each 1 variable
    Zobs = computeObservedStatistics(G, A, param_func_list)
    print('Zobs = ', Zobs)

    theta = np.zeros(len(param_func_list))

    estimation_start = time.time()
    max_runs = 20
    i = 0
    converged = False
    while i < max_runs and not converged:
        i += 1
        print('Running stochastic approximation (run', i,' of at most',max_runs,')...')
        start = time.time()
        (theta, std_error, t_ratio) = stochasticApproximation(G, A,
                                                              param_func_list,
                                                              theta, Zobs,
                                                              sampler_func) 

        print('Stochastic approximation took',time.time() - start, 's')
        if theta is None:
            print('Failed.')
            break
        print('           ',labels)
        print('theta     =', theta)
        print('std_error =', std_error)
        print('t_ratio   =', t_ratio)

        converged = np.all(np.abs(t_ratio) < 0.1)

    print('Total estimation time (',i,'runs) was',time.time() - estimation_start, 's')
    if converged:
        print('Converged.')
        significant = np.abs(theta) > 2 * std_error
        sys.stdout.write(20*' ' + '  Parameter Std.Error t-ratio\n')
        for j in range(len(theta)):
            sys.stdout.write('%20.20s % 6.3f    % 6.3f    % 6.3f %c\n' % (labels[j], theta[j], std_error[j], t_ratio[j], ('*' if significant[j] else ' ')))
        print()

        # Do goodness-of-fit test

        # change stats functions to add to GoF if not already in estimation
        if directed:
            # TODO GoF statistics for directed
            gof_param_func_list = list(param_func_list)
            goflabels = list(labels)
        else:
            statfuncs = [changeTwoStar, changeThreeStar, changePartnerActivityTwoPath,
                         changeTriangleT1, changeContagion,
                         changeIndirectPartnerAttribute,
                         changePartnerAttributeActivity, 
                         changePartnerPartnerAttribute,
                         changeTriangleT2,
                         changeTriangleT3]
            statlabels = ['Two-Star', 'Three-Star', 'Alter-2Star1A',
                          'T1', 'Contagion', 'Alter-2Star2A', 'Partner-Activity',
                          'Partner-Resource','T2', 'T3']
            gof_param_func_list = (list(param_func_list) +
                                   [f for f in statfuncs
                                if f not in param_func_list])
            goflabels = (list(labels) + [f for f in statlabels
                                     if f not in labels])
        n = len(gof_param_func_list)
        assert len(goflabels) == n
        # pad theta vector with zeros for the added parameters
        gof_theta = np.array(list(theta) + (n-len(theta))*[0])

        Ainitial = None # default: use random intialization
        if zone_filename is not None: # conditional estimation
            # For snowball conditional estimation, we must not start with
            # random initial outcome vector, but rather make sure the
            # nodes in the outermost zone have the same outcome attributes
            # as the obseved vector
            Ainitial = np.copy(A) # copy of observed vector
            # make vector of 50% ones, size of number of inner nodes
            Arandom_inner = rand_bin_array(int(0.5*len(G.inner_nodes)), len(G.inner_nodes))
            # set the outcome for inner nodes to random values, leaving
            # value of outermost nodes at the original observed values
            Ainitial[G.inner_nodes] = Arandom_inner
        print('Running goodness-of-fit test...')
        start = time.time()
        gofresult = gof(G, A, gof_param_func_list, gof_theta,
                        sampler_func = sampler_func, Ainitial = Ainitial)
        print('GoF took',time.time() - start, 's')
        print('           ',goflabels)
        print('t_ratios = ',gofresult)
        
        sys.stdout.write(20*' ' + '  t-ratio\n')
        for j in range(n):
            sys.stdout.write('%20.20s % 6.3f\n' % (goflabels[j], gofresult[j]))
        print()
def lastTransitiveClosureTime(numNodes, transaction_list, verbose):
    """
    Given a list of transactions which are in the form of 
    a list of (sender, receiver, timestamp) tuples, return the list
    of times at which open two-paths are closed.
    
    Parameters:
        numNodes         - number of actors (nodes)
        transaction_list - list of (sender, receiver, timestamp) tuples.
                           Sender and receiver are intergers in 0..numNodes-1
                           and timestamps are numeric values.
                           The list must be ordered by timestamp ascending.
        verbose          - if True write debug output to stdout

    Return value:
        List of tuples (open_time, delta_time)
        where open_time is second timestamp in open two-path and
        and delta_time is it took open two-paths to be closed (the difference
        in timestamp between the closing transaction (arc) and the second
        (along arc) timestamp in the open two-path, BUT only if the second
        arc in two-path has higher timestamp than the first (i.e. we
        don't count a two-path that goes backward in time along the path)
    """
    G = Digraph(numNodes)
    lastTime = None
    delta_time_list = []
    for trans in transaction_list:
        assert (trans[TSENDER] >= 0 and trans[TSENDER] < numNodes)
        assert (trans[TRECEIVER] >= 0 and trans[TRECEIVER] < numNodes)
        assert (lastTime is None or trans[TTIME] >= lastTime)
        if verbose:
            print trans[TSENDER], '->', trans[TRECEIVER], ' at time ', trans[
                TTIME],
        closed_2paths_v = closedTwoPaths(G, trans[TSENDER], trans[TRECEIVER])
        if len(closed_2paths_v) > 0:
            if verbose:
                print 'closed', len(
                    closed_2paths_v), 'two-paths via', closed_2paths_v
            path_2nd_time_list = []
            for v in closed_2paths_v:
                path_1st_time = G.G[trans[TSENDER]][v]
                path_2nd_time = G.G[v][trans[TRECEIVER]]
                if path_2nd_time > path_1st_time:
                    if verbose:
                        print '  path via', v, 'is forward in time (', path_1st_time, ',', path_2nd_time, '), considering'
                    path_2nd_time_list.append(path_2nd_time)
                else:
                    if verbose:
                        print '  path via', v, 'is backwards in time (', path_1st_time, ',', path_2nd_time, '), skipping'
            if len(path_2nd_time_list) > 0:
                path_2nd_time_max = max(path_2nd_time_list)
                delta_time = trans[TTIME] - path_2nd_time_max
                if verbose:
                    print '  ', len(
                        path_2nd_time_list
                    ), 'paths considered as forward in time, max 2nd time is', path_2nd_time_max, ' appending delta_time =', delta_time
                delta_time_list.append((path_2nd_time_max, delta_time))
            else:
                if verbose:
                    print '  (no paths forward in time)'
        else:
            if verbose:
                print
        if not G.isArc(trans[TSENDER], trans[TRECEIVER]):
            # only insert arc if not already one there, to keep first
            # time on transactions, not subsequent times.
            G.insertArc(trans[TSENDER], trans[TRECEIVER], trans[TTIME])
        lastTime = trans[TTIME]
    return delta_time_list
示例#23
0
    def vid(self, v):
        self.validateVertex(v)
        return self.id[v]

    def check(self, G):
        pass

    def validateVertex(self, v):
        V = len(self.marked)
        if v < 0 or v >= V:
            raise ("vertex %d is not between 0 and %d" % (v, V - 1))


if __name__ == '__main__':
    myin = In(sys.argv[1], None)
    G = Digraph(myin)
    scc = KosarajuSharirSCC(G)
    m = scc.mcount()
    print("%d strong components" % m)

    components = [None] * m
    for i in range(0, m):
        components[i] = []
    for v in range(0, G.Vertex()):
        components[scc.vid(v)].append(v)

    s = ""
    for i in range(0, m):
        for v in components[i]:
            s += "%d " % v
        s += '\n'
                self.cycle = []
                x = v
                while x != w:
                    self.cycle.append(x)
                    x = self.edgeTo[x]
                self.cycle.append(w)
                self.cycle.append(v)
        self.onStack[v] = False

    def hasCycle(self):
        return self.cycle != []

    def cycle_l(self):
        return self.cycle


if __name__ == '__main__':
    myin = In(sys.argv[1])
    G = Digraph(myin)

    finder = DirectedCycle(G)
    s = ""
    if finder.hasCycle():
        print("Directed cycle: ")
        for v in finder.cycle_l():
            s += "%d " % v
        print(s)
    else:
        print("No directed cycle")
    print("\n")