示例#1
0
文件: evaluation.py 项目: aidanf/Elie
 def __init__(self,corpus,datadir,field,ntrials=10,train_proportion=0.5):
     self.datadir = datadir
     self.field=field
     self.corpus = corpus
     self.experiments = []
     desc_file = open(os.path.join(self.datadir,"parameters.txt"),'w')
     config.describe("%s trials of %s split"%(ntrials,train_proportion),outstream=desc_file)
     desc_file.close()
     for n in range(ntrials):
         random.shuffle(self.corpus)
         random.shuffle(self.corpus)
         split_point = int(ceil(len(self.corpus)*train_proportion))
         train = self.corpus[:split_point]
         test = self.corpus[split_point:]
         split_file = open(os.path.join(datadir,'elie.'+field+'.'+str(n)+'.split'),'w')
         for t in train:
             t = os.path.split(t)[-1]
             print >>split_file,t
         print >>split_file, 20*'*'
         for t in test:
             t = os.path.split(t)[-1]
             print >>split_file,t
             
         new_exper = Experiment(train,test,self.datadir,self.field,number=n)
         self.experiments.append(new_exper)
示例#2
0
文件: evaluation.py 项目: aidanf/Elie
 def __init__(self,corpusdir, datadir, field, splitfiles):
     self.datadir = datadir
     self.field=field
     self.corpusdir = corpusdir
     self.splitfiles = splitfiles
     self.experiments = []
     desc_file = open(os.path.join(self.datadir,"parameters.txt"),'w')
     desc_str = "Using pre-defined splits:\n"
     for sf in self.splitfiles:
         desc_str = desc_str + sf + "\n"
     config.describe(desc_str,outstream=desc_file)
     desc_file.close()
     for sf in self.splitfiles:
         n = os.path.split(sf)[-1].split('.')[2]
         f = open(sf)
         tmp1,tmp2 = string.split(f.read(),20*'*')
         train = []
         test = []
         for tmp in tmp1.split('\n'):
             tmp = tmp.strip()
             if tmp:
                 tmp = os.path.join(self.corpusdir,tmp)
                 if tmp[-9:] != '.elie.pre':
                     tmp = tmp + '.elie.pre'
                 train.append(tmp)
         for tmp in tmp2.split('\n'):
             tmp = tmp.strip()
             if tmp:
                 tmp = os.path.join(self.corpusdir,tmp)
                 if tmp[-9:] != '.elie.pre':
                     tmp = tmp + '.elie.pre'
                 test.append(tmp)
             
         #print "XXX",train,"\n\n",test
         #raw_input()
         split_log = open(os.path.join(datadir,'elie.'+field+'.'+str(n)+'.split'),'w')
         for t in train:
             t = os.path.split(t)[-1]
             print >>split_log,t
         print >>split_log, 20*'*'
         for t in test:
             t = os.path.split(t)[-1]
             print >>split_log,t
             
         new_exper = Experiment(train,test,self.datadir,self.field,number=n)
         self.experiments.append(new_exper)