示例#1
0
def process(data, overwrite=False):
    prog.genGauss(data, overwrite=overwrite, printcmd=True)
    prog.vec2bin(data, overwrite, printcmd=True)
    prog.vec2hdf5(data, overwrite, printcmd=True)
    prog.vec2vect(data, overwrite, printcmd=True)
    oldfold = data.cfg.F
    for i in range(data.cfg['nfolds']):
        from runlsh import runkbench
        data.cfg.F = i
        nd = dh.Data(data.cfg)
        runkbench(nd, overwrite)

    data.cfg.F = oldfold

    # confName = "gaussoraConfig_nclus=%d_dim=%d_var=%s.txt" %(nclus,dim,var)
    gcprog = "gaussoraConf.pl"
    # confFile = "%s/%s" %(confDir,confName)

    gprog = "gaussora"
    # dataName = "gaussian_nclus=%d_dim=%d_var=%s_size=%d.vec" %(nclus,dim,var,size)
    # dataFile = "%s/%s" %(dataDir,dataName)

    # dataName2 = "gaussian_nclus=%d_dim=%d_var=%s_size=%d.vect" %(nclus,dim,var,size)
    # dataFile2 = "%s/%s" %(dataDir,dataName2)

    # convertProg1 = "vec2hdf5"
    # hdf5Name = "gaussian_nclus=%d_dim=%d_var=%s_size=%d.hdf5" %(nclus,dim,var,size)
    # hdf5File = "%s/%s" %(dataDir,hdf5Name)

    # convertProg2 = "vec2bin"
    # vbinName = "gaussian_nclus=%d_dim=%d_var=%s_size=%d.lbin" %(nclus,dim,var,size)
    # vbinFile = "%s/%s" %(dataDir,vbinName)

    createQueryProy = "createQueries"
示例#2
0
def getlsh(data):
    allv = []
    for t in dh.LSHTypeEnum.getValidTypes():
        data.cfg['lshtype'] = t
        nd = dh.Data(data.cfg)
        try:
            fs = LSHStatter(nd.getFoldedFiles('lshbenchfilepath'), nd)
            allv.append(fs)
        except:
            allv.append(NOStatter(t.name.upper(), nd))
    return allv
示例#3
0
def fullprocess(data,
                overwritedata=False,
                overwriteindex=False,
                overwritebench=False):
    dh.Data.mkdirs(data.indexdir, data.querydir, data.resultdir, data.confdir)
    vec2bin(data, overwritedata)
    vec2hdf5(data, overwritedata)

    bestcfg = findbest(data, overwritedata, overwriteindex, overwritebench)
    data = dh.Data(bestcfg)
    print("Best M, L ", bestcfg['lshM'], bestcfg['lshL'])
    d = process(data, overwritedata, overwriteindex, overwritebench)
    st = LSHStatter(d.getFoldedFiles('lshbenchfilepath'), d)
    sprinter.printstats(st)
    return d, st
示例#4
0
def findbest(data,
             overwritedata=False,
             overwriteindex=False,
             overwritebench=False):

    sq = math.sqrt(data.S)
    cfg = copy.deepcopy(data.cfg)
    bestcfg = copy.deepcopy(data.cfg)
    cfg['nfolds'] = 2
    bestcost = 0
    grid = []
    for M in [sq / 8]:
        for L in [4]:
            for N in [6]:
                grid.append((M, L, N))
    for M, L, N in grid:
        try:
            cfg['lshM'] = int(M)
            cfg['lshL'] = int(L)
            cfg['lshN'] = int(N)
            print("cfg", cfg['lshM'])

            for i in range(cfg['nfolds']):
                cfg.F = i
                rundata = process(dh.Data(cfg))

            st = LSHStatter(rundata.getFoldedFiles('lshbenchfilepath'), data)
            weightedcost = (st.precision + st.recall + (1 - st.cost)) / 3
            if weightedcost > bestcost:
                bestcost = weightedcost
                bestcfg = copy.deepcopy(cfg)
        except:
            traceback.print_exc()

    bestcfg['nfolds'] = data.cfg['nfolds']
    return bestcfg
示例#5
0
             printcmd=True)


def gendata(data, overwrite=False):
    dh.Data.mkdirs(data.benchdir, data.confdir, data.indexdir, data.querydir)
    vec2hdf5(data, overwrite=overwrite)
    if data.cfg.synthetic:
        genGauss.process(data, overwrite)

    _vec2hdf5(data.qvecfilepath,
              data.qhdf5filepath,
              overwrite=overwrite,
              printcmd=True)


if __name__ == "__main__":
    if len(sys.argv) == 1:
        sys.argv = sysarg.args(__file__)
    overwrited = '--overwritedata' in sys.argv
    overwritei = '--overwriteindex' in sys.argv
    overwriteb = '--overwritebench' in sys.argv

    args, unknown = sysarg.getParsed(sys.argv, True)
    print(args)
    cfg = config.Config(vars(args))
    ocfg = copy.deepcopy(cfg)
    data = dh.Data(cfg)
    addLogFile(data.logfile)

    fullprocess(data, overwrited, overwriteb)
示例#6
0
            traceback.print_exc()

    bestcfg['nfolds'] = data.cfg['nfolds']
    return bestcfg


if __name__ == "__main__":
    if len(sys.argv) == 1:
        sys.argv = sysarg.args(__file__)
    overwritei = '--overwriteindex' in sys.argv
    overwrited = '--overwritedata' in sys.argv
    overwriteb = '--overwritebench' in sys.argv
    ap = sysarg.getArgParse(sys.argv, needsquerydata=True)
    args, unknown = ap.parse_known_args()
    runcfg = config.Config(vars(args))
    rundata = dh.Data(runcfg)
    addLogFile(rundata.logfile)

    if runcfg.synthetic:
        genGauss.process(rundata, overwrited)

    rundata = fullprocess(rundata,
                          overwritedata=overwrited,
                          overwriteindex=overwritei,
                          overwritebench=overwriteb)

    # printl('config', 'avgcalcs', 'meanquerytime', 'precision', 'recall', 'cost')
    files = rundata.getFoldedFiles('lshbenchfilepath')
    # print(rundata.lshbenchfilepath)
    # print(files)
    st = LSHStatter(files, rundata)