Exemplo n.º 1
0
    for key, val in class_options.iteritems():
        print(key)
    raise Exception('wrong class selection')

if not ".dc" in infile:
    raise Exception('wrong input file '+infile)
    
dir = os.path.dirname(infile)

dcold = DCOld()
dcold.readRawFromFile(infile)


dcnew = DataCollection()
dcnew.dataclass = traind()
dcnew.samples = [s[:-4]+'djctd' for s in dcold.samples]
print(dcnew.samples)
dcnew.sourceList = dcold.originRoots
# leave traindata undefined no way to convert.
dcnew.__nsamples = 0 # determine again, also check

outfile = infile[:-2] +'djcdc'
print("infile: ", infile, " outfile", outfile)

def worker(i):

    td = TDOld()
    tdnew = TrainData()
    print("converting",dcold.samples[i])
    
    td.readIn(dir + dcold.samples[i])
Exemplo n.º 2
0
dc = DataCollection(infile)
dc2 = DataCollection(infile)
samples = dc.samples

dir = dc.dataDir
if len(dir)<1:
    dir='.'
insamples = [dir+'/'+s for s in samples]

gen = TrainDataGenerator()
gen.setBatchSize(nbatch)
gen.setSkipTooLargeBatches(False)
gen.setFileList(insamples)

if randomise:
    gen.shuffleFileList()

nbatches = gen.getNBatches()

newsamples=[]
for i in range(nbatches):
    newname = str(samples[0][:-6]+"_n_"+str(i)+".djctd")
    newsamples.append(newname)
    ntd = gen.getBatch()
    print(newname)
    ntd.writeToFile(newname)
    print('..written')
    
dc2.samples = newsamples
dc2.writeToFile(infile[:-5]+"_n.djcdc")
Exemplo n.º 3
0
        atexit.register(removeTmp)

        #reduce memory footprint here
        td.writeFromSourceFile(inputdir + "/" + inputfile,
                               dc.weighterobjects,
                               istraining=False,
                               "pred_tmp.djctd")

        x = td.transferFeatureListToNumpy()
        y = td.transferWeightListToNumpy()
        w = td.transferTruthListToNumpy()

        td.clear()

        dc.samples = [tmpdir + "/pred_tmp.djctd"]
        dc.setBatchSize(batchsize)
        dc.invokeGenerator()
        nbatches = dc.generator.getNBatches()

        print('predicting ' + inputfile)
        predicted = model.predict_generator(dc.generatorFunction(),
                                            steps=nbatches,
                                            max_queue_size=1,
                                            use_multiprocessing=False,
                                            verbose=1)

        removeTmp()

        if not type(
                predicted