Exemplo n.º 1
0
 def test(self):
     
     passed = True
     
     dc = DataCollection()
     dc.dataclass = TrainData_test
     dc.sourceList = [f for f in self.files.filenames]
     dc.createDataFromRoot(TrainData_test, outputDir=self.dcoutdir.path)
     
     gen = dc.invokeGenerator()
     gen.setBatchSize(self.n_per_batch)
     
     for epoch in range(10):
         gen.prepareNextEpoch()
         print("epoch",epoch,'batches',gen.getNBatches())
         for b in range(gen.getNBatches()):
             d,_ = next(gen.feedNumpyData())
             data,rs = d[0],d[1]
             rs = np.array(rs[:,0],dtype='int')
             rs = rs[:rs[-1]]
             #print(data)
             #print(rs[-1])
             if not raggedtester.checkData(data, rs):
                 print('epoch',epoch, 'batch',b,'broken')
                 passed=False
                 break
             if rs[-1] > self.n_per_batch:
                 print('maximum batch size exceeded for batch ',b, 'epoch', epoch)
             
         print('shuffling')
         gen.shuffleFilelist()
         
     return passed
Exemplo n.º 2
0
    raise Exception('wrong class selection')

if not ".dc" in infile:
    raise Exception('wrong input file '+infile)
    
dir = os.path.dirname(infile)

dcold = DCOld()
dcold.readRawFromFile(infile)


dcnew = DataCollection()
dcnew.dataclass = traind()
dcnew.samples = [s[:-4]+'djctd' for s in dcold.samples]
print(dcnew.samples)
dcnew.sourceList = dcold.originRoots
# leave traindata undefined no way to convert.
dcnew.__nsamples = 0 # determine again, also check

outfile = infile[:-2] +'djcdc'
print("infile: ", infile, " outfile", outfile)

def worker(i):

    td = TDOld()
    tdnew = TrainData()
    print("converting",dcold.samples[i])
    
    td.readIn(dir + dcold.samples[i])
    x = td.x
    y = td.y