예제 #1
0
def convertNDDSToCDS(options):
    size = options['numberOfData']
    dim = options['numberOfDimension']
    distribution = options['distribution']
    cardinality = options['numberOfAlphabet']
    numberOfVP = options['numberOfVP']
    typeOfVP = options['typeOfVP']

    dataFileName = 'data/data_%d_%d_%s_%d.txt' % (size, dim, distribution,
                                                  cardinality)
    queryFileName = 'query/query_%d_%d_%s_%d.txt' % (size, dim, distribution,
                                                     cardinality)
    vpFileName = 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality,
                                            typeOfVP)
    #    cdsDataFileName = utils.getCDSDataFileName(options)
    #    cdsQueryFileName= utils.getCDSQueryFileName(options)

    datas = utils.getDataInFile(dataFileName)
    querys = utils.readDataFromFile(queryFileName)
    vps = utils.readDataFromFile(vpFileName)
    print len(datas), len(querys), len(vps)

    d = [{} for i in xrange(dim)]
    for i in xrange(len(datas)):
        for j in xrange(dim):
            if datas[i][j] in d[j]:
                d[j][datas[i][j]] += 1
            else:
                d[j][datas[i][j]] = 1

    for i in xrange(dim):
        for key in d[i]:
            d[i][key] = 1.0 - float(d[i][key]) / float(len(datas))

    def geh(a, b):
        ret = 0.0
        for i in xrange(len(a)):
            if a[i] != b[i]:
                ret += 1.0
            else:
                ret += d[i][a[i]] / float(dim)
        return ret / float(dim)

    datas = datas[:1000]
    average = 0.0
    for i in xrange(len(datas)):
        cur = 98765432
        for j in xrange(len(datas)):
            if i != j:
                cur = min(cur, geh(datas[i], datas[j]))
        average += cur
    average /= len(datas)
    print average
예제 #2
0
def fetch_words_by_subjects():
    data = readDataFromFile('data/subjects.json')
    subjects = data['Subjects']
    for subject in subjects:
        with urllib.request.urlopen("https://minder.vn/api/words/words?id_subject={0}".format(subject['id'])) as url:
            data = json.loads(url.read().decode())
            writeDataToFile('data/subject-{0}.json'.format(subject['id']), data)
예제 #3
0
def calculateAllPairDistance(options):
    numberOfData = options['numberOfData']
    dim          = options['numberOfDimension']
    numberOfVP   = options['numberOfVP']
    cardinality  = options['numberOfAlphabet']
    typeOfVP     = options['typeOfVP']
    vps = utils.readDataFromFile(utils.getVPFileName(options))
    for i in xrange(len(vps)):
        for j in xrange(i+1,len(vps)):
            print i,j,utils.hammingDistance(vps[i],vps[j])
예제 #4
0
def convertNDDSToCDS(options):
    size = options['numberOfData']
    dim = options['numberOfDimension']
    distribution = options['distribution']
    cardinality = options['numberOfAlphabet']
    numberOfVP = options['numberOfVP']
    typeOfVP = options['typeOfVP']

    dataFileName = 'data/data_%d_%d_%s_%d.txt' % (size, dim, distribution,
                                                  cardinality)
    queryFileName = 'query/query_%d_%d_%s_%d.txt' % (size, dim, distribution,
                                                     cardinality)
    vpFileName = 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality,
                                            typeOfVP)
    cdsDataFileName = utils.getCDSDataFileName(options)
    cdsQueryFileName = utils.getCDSQueryFileName(options)
    #    cdsDataFileName     = 'cds_data/data_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP)
    #    cdsQueryFileName    = 'cds_query/query_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP)

    datas = utils.getDataInFile(dataFileName)
    querys = utils.readDataFromFile(queryFileName)
    vps = utils.readDataFromFile(vpFileName)
    print len(datas), len(querys), len(vps)

    cdsDatas = []
    for i in xrange(len(datas)):
        t = []
        for j in xrange(len(vps)):
            t.append(utils.hammingDistance(datas[i], vps[j]))
        cdsDatas.append(t)
    utils.writeDataToFile(cdsDataFileName, cdsDatas)

    cdsQuerys = []
    for i in xrange(len(querys)):
        t = []
        for j in xrange(len(vps)):
            t.append(utils.hammingDistance(querys[i], vps[j]))
        cdsQuerys.append(t)
    utils.writeDataToFile(cdsQueryFileName, cdsQuerys)
    print cdsDataFileName, cdsQueryFileName
예제 #5
0
def convertNDDSToCDS(options):
    size            = options['numberOfData']
    dim             = options['numberOfDimension']
    distribution    = options['distribution']
    cardinality     = options['numberOfAlphabet']
    numberOfVP      = options['numberOfVP']
    typeOfVP        = options['typeOfVP']

    dataFileName    = 'data/data_%d_%d_%s_%d.txt'%(size,dim,distribution,cardinality)
    queryFileName   = 'query/query_%d_%d_%s_%d.txt'%(size,dim,distribution,cardinality)
    vpFileName      = 'vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP)
    cdsDataFileName = utils.getCDSDataFileName(options)
    cdsQueryFileName= utils.getCDSQueryFileName(options)
#    cdsDataFileName     = 'cds_data/data_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP)
#    cdsQueryFileName    = 'cds_query/query_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP)

    datas   = utils.getDataInFile(dataFileName)
    querys  = utils.readDataFromFile(queryFileName)
    vps     = utils.readDataFromFile(vpFileName)
    print len(datas),len(querys),len(vps)

    cdsDatas = []
    for i in xrange(len(datas)):
        t = []
        for j in xrange(len(vps)):
            t.append(utils.hammingDistance(datas[i],vps[j]))
        cdsDatas.append(t)
    utils.writeDataToFile(cdsDataFileName,cdsDatas)

    cdsQuerys = []
    for i in xrange(len(querys)):
        t = []
        for j in xrange(len(vps)):
            t.append(utils.hammingDistance(querys[i],vps[j]))
        cdsQuerys.append(t)
    utils.writeDataToFile(cdsQueryFileName,cdsQuerys)
    print cdsDataFileName, cdsQueryFileName
예제 #6
0
def parse_words():
    data = readDataFromFile('data/subjects.json')
    subjects = data['Subjects']
    for sub in subjects:
        subject = {
            'id': sub['id'],
            'name': sub['name'],
            'total': sub['total'],
            'words': []
        }
        subject_data = readDataFromFile('data/subject-{}.json'.format(
            sub['id']))
        words = subject_data['Words']
        '''
        parse dict: word {'id', 'word', 'phonetic', 'mean'}
        '''
        for word in words:
            subject['words'].append({
                key: word[key]
                for key in word.keys() & {'id', 'word', 'phonetic', 'mean'}
            })

        worksheet = spreadsheet.create_worksheet(subject['name'])
        spreadsheet.update_cells(worksheet, subject)
예제 #7
0
파일: statistic.py 프로젝트: sgzs6721/ssc
def getSplitData(fromDate, toDate, type):
    data = utils.readDataFromFile(type, fromDate, toDate)

    splitData = []
    for i in range(5):
        splitData.append([])

    for number in data:
        info = number.split(" ")
        realNumber = info[2]
        date = info[0]
        time = info[1]
        for i in range(5):
            splitData[i].append(" ".join([realNumber[i], date, time]))

    return splitData
예제 #8
0
파일: statistic.py 프로젝트: sgzs6721/ssc
def getSplitData(fromDate, toDate, type) :
    data = utils.readDataFromFile(type, fromDate, toDate)

    splitData = []
    for i in range(5) :
        splitData.append([])

    for number in data :
        info       = number.split(" ")
        realNumber = info[2]
        date       = info[0]
        time       = info[1]
        for i in range(5) :
            splitData[i].append(" ".join([realNumber[i], date, time]))

    return splitData
예제 #9
0
def calculateAllPairDistance(options):
    numberOfData = options['numberOfData']
    dim = options['numberOfDimension']
    numberOfVP = options['numberOfVP']
    cardinality = options['numberOfAlphabet']
    typeOfVP = options['typeOfVP']
    vps = utils.readDataFromFile(utils.getVPFileName(options))
    d = [0 for i in xrange(dim + 1)]
    s = set()
    for i in xrange(len(vps)):
        for j in xrange(i + 1, len(vps)):
            dist = utils.hammingDistance(vps[i], vps[j])
            d[dist] = d[dist] + 1
            s.add(dist)
            print i, j, dist
    for i in xrange(0, dim + 1):
        print i, d[i]
    print len(s)
예제 #10
0
    utils.createDirectory('ndt_data')
    utils.createDirectory('ndt_query')
    dictionary = makeDictionaryKeyIsAlphabet()

    dataFileNames = glob.glob('data/*.txt')
    for dataFileName in dataFileNames:
        print dataFileName
        onlyFileName = dataFileName.split('.')[0].split('/')[1]
        size        = onlyFileName.split('_')[1]
        dim         = onlyFileName.split('_')[2]
        vptype      = onlyFileName.split('_')[3]
        cardinality = onlyFileName.split('_')[4]

        queryFileName = 'query/query_%s_%s_%s_%s.txt'%(size,dim,vptype,cardinality)
        datas = utils.getDataInFile(dataFileName)
        querys = utils.readDataFromFile(queryFileName)

        ndtDataFileName = 'ndt_data/data_%s_%s_%s_%s.txt'%(size,dim,vptype,cardinality)
        ndtQueryFileName = 'ndt_query/query_%s_%s_%s_%s.txt'%(size,dim,vptype,cardinality)

        if os.path.exists(ndtDataFileName):
            print '%s is exists'%ndtDataFileName
        else :
            with open(ndtDataFileName,'w') as fp:
                for i in xrange(len(datas)):
                    for j in xrange(len(datas[i])):
                        fp.write(str(dictionary[datas[i][j]])+' ')
                    fp.write('\n')
        if os.path.exists(ndtQueryFileName):
            print '%s is exsts'%ndtQueryFileName
        else :
예제 #11
0
def convertNDDSToCDS(options):
    size = options['numberOfData']
    dim = options['numberOfDimension']
    distribution = options['distribution']
    cardinality = options['numberOfAlphabet']
    numberOfVP = options['numberOfVP']
    typeOfVP = options['typeOfVP']

    dataFileName = 'data/data_%d_%d_%s_%d.txt' % (size, dim, distribution,
                                                  cardinality)
    queryFileName = 'query/query_%d_%d_%s_%d.txt' % (size, dim, distribution,
                                                     cardinality)
    vpFileName = 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality,
                                            typeOfVP)
    #    cdsDataFileName = utils.getCDSDataFileName(options)
    #    cdsQueryFileName= utils.getCDSQueryFileName(options)
    cdsDataFileName = 'cds_data/data_%d_%d_%d_%s_%d_%sgeh.txt' % (
        size, dim, numberOfVP, distribution, cardinality, typeOfVP)
    cdsQueryFileName = 'cds_query/query_%d_%d_%d_%s_%d_%sgeh.txt' % (
        size, dim, numberOfVP, distribution, cardinality, typeOfVP)

    datas = utils.getDataInFile(dataFileName)
    querys = utils.readDataFromFile(queryFileName)
    vps = utils.readDataFromFile(vpFileName)
    print len(datas), len(querys), len(vps)

    d = [{} for i in xrange(dim)]
    for i in xrange(len(datas)):
        for j in xrange(dim):
            if datas[i][j] in d[j]:
                d[j][datas[i][j]] += 1
            else:
                d[j][datas[i][j]] = 1

    for i in xrange(dim):
        for key in d[i]:
            d[i][key] = 1.0 - float(d[i][key]) / float(len(datas))

    def geh(a, b):
        ret = 0.0
        for i in xrange(len(a)):
            if a[i] != b[i]:
                ret += 1.0
            else:
                ret += d[i][a[i]] / float(dim)
        return ret / float(dim)

    cdsDatas = []
    for i in xrange(len(datas)):
        t = []
        for j in xrange(len(vps)):
            t.append(geh(datas[i], vps[j]))
        cdsDatas.append(t)
    utils.writeDataToFile(cdsDataFileName, cdsDatas)

    cdsQuerys = []
    for i in xrange(len(querys)):
        t = []
        for j in xrange(len(vps)):
            t.append(geh(querys[i], vps[j]))
        cdsQuerys.append(t)
    utils.writeDataToFile(cdsQueryFileName, cdsQuerys)
    print cdsDataFileName, cdsQueryFileName
예제 #12
0
#!/usr/bin/python
#-*- coding:utf-8 -*-
import utils
import os
import string

if __name__ == '__main__':
    utils.createDirectory('figure')
    options = utils.getOptions()

    dataFileName = utils.getDataFileName(options)
    vpFileName = utils.getVPFileName(options)
    datas = utils.getDataInFile(dataFileName)
    vps = utils.readDataFromFile(vpFileName)

    curDatas = datas
    for i in xrange(len(vps)):
        print i
        n = len(curDatas)
        x = [[] for j in xrange(len(vps[i]) + 1)]
        for j in xrange(n):
            nextPosition = utils.hammingDistance(vps[i], curDatas[j])
            x[nextPosition].append(j)
        mx, position = (0, 0)
        xp = []
        yp = []
        for j in xrange(len(vps[i]) + 1):
            xp.append(j)
            yp.append(len(x[j]))
            if mx < len(x[j]):
                mx, position = (len(x[j]), j)
예제 #13
0
#!/usr/bin/python
#-*- coding:utf-8 -*-
import utils
import os
import string
import numpy as np

if __name__ == '__main__':
    utils.createDirectory('figure_pair')
    options = utils.getOptions()
    dim     = options['numberOfDimension']

    dataFileName    = utils.getDataFileName(options)
    vpFileName      = utils.getVPFileName(options)
    datas           = utils.getDataInFile(dataFileName)
    vps             = utils.readDataFromFile(vpFileName)

    for i in xrange(len(vps)):
        for j in xrange(i+1,len(vps)):
            cc = utils.calculateCorrelationCoefficient(vps[i],vps[j],datas)
            cc = abs(cc)
            imageFileName = utils.getFigurePairName(options,i,j,cc)
            print imageFileName
            if os.path.exists(imageFileName):
                print '%s is exists'%imageFileName
                continue
            xp = []
            yp = []
            zp = []
            zcnt = [ [ 0 for ii in xrange(dim+1) ] for jj in xrange(dim+1) ]
            for k in xrange(len(datas)):
예제 #14
0
from GA import GA
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import warnings
import math

from utils import readDataFromFile, modularity, printData

#Pun aceeasi valoare intr un vector pe pozitia nodurilor care fac parte dintr o comunitate

#reteaua mea din care citesc
net = readDataFromFile('polbooks.gml')

#parametri genetic algorithm
#300 de cromozomi in populatie
gaParam = {"popSize": 300, "noGen": 15, "network": net}
problParam = {'function': modularity, 'retea': net}


def afisare_graf(network):
    warnings.simplefilter('ignore')

    A = np.matrix(network["matrix"])
    G = nx.from_numpy_matrix(A)
    pos = nx.spring_layout(G)  # compute graph layout
    plt.figure(figsize=(8, 8))  # image is 8 x 8 inches
    nx.draw_networkx_nodes(G, pos, node_size=600, cmap=plt.cm.RdYlBu)
    nx.draw_networkx_edges(G, pos, alpha=0.3)
    plt.show()