def DegreeClassify(dataPath='../LinkAnalyticsData/UTK_problem/'):
    """ Finds the distrbtion of the cummulative node and edge wights """
    filenames=('Moria_2','Standelf_2')
    attrs=('calls','texts','degree','secs')
    for f in filenames:
        # Reading in the Graph
        MG = GU.readData(os.path.join(dataPath,f)+'.graph')
        deg = nx.degree(MG)
def GetEdgeDistributions(dataPath='../LinkAnalyticsData/UTK_problem/'):
    filenames=('Moria_1.graph','Standelf_1.graph')
    attrs=('calls','texts','days','secs')
    for f in filenames:
        # Reading in the Graph
        MG = GU.readData(os.path.join(dataPath,f))
        # Distribution of each Attribute
        for attr in attrs:
            data = GU.GetAttr(MG,attr)
            # Plotting
            pyplot.hist(data,100)
            pyplot.yscale('log')
            pyplot.grid(True)
            pyplot.ylabel("Frequency")
            pyplot.xlabel(attr)
            name = f.split('.')[0].split('_')[0]
            title = name+" "+attr+" distribution"
            pyplot.title(title)
            pyplot.savefig(name+"_"+attr+"_distribution.png")
            pyplot.clf()
def readData(filename='../LinkAnalyticsData/UTK_problem/Moria_1.graph'):
    """ Creates a dataset for ANN training of the formated data supplied by filename """
    """ Currently based on 4x2 inputs of days, calls, call duration, and texts """
    """ Two 'classes' are implemented, either there or not """
    numInputs = 2+4+2
    alldata = ClassificationDataSet(numInputs,1,nb_classes=2)
    MG = GU.readData(filename)
    closeness = nx.closeness_centrality(MG)
    degree = nx.degree(MG)
   
    startTime = datetime.now()
    # Computing the data
    data = [[closeness[u],degree[u],\
            edata['calls'],edata['secs'],edata['texts'],edata['days'],\
            degree[v],closeness[v]] \
            for u,v,edata in MG.edges(data=True)]
    for d in data:
        alldata.addSample(d,[1])

    print "Converted to data in ",(datetime.now()-startTime)
    return alldata
def GetDataDistributions(dataPath='../LinkAnalyticsData/UTK_problem/'):
    filenames=('Moria_1.graph','Standelf_1.graph')
    """ Finds the distrbtion of the cummulative node and edge wights """
    attrs=('calls','texts','degree','secs')
    for f in filenames:
        # Reading in the Graph
        MG = GU.readData(os.path.join(dataPath,f))
        g = GU.ConvertToSingle(MG)
        for attr in attrs:
            x = list()
            for n in g.nodes():
                x.append(g.node[n][attr])

            # Plotting the Data
            largest = heapq.nlargest(3,x)
            pyplot.figure()
            pyplot.hist(x,bins=np.logspace(1,np.log2(largest[2]),25,base=2))
            pyplot.ylabel("Frequency")
            pyplot.xlabel(attr)
            name = f.split('.')[0].split('_')[0]
            title = name+" "+attr+" distribution"
            pyplot.title(title)
            pyplot.savefig(name+"_"+attr+"_cum_distribution.png")