def main(argv): if len(argv) < 2: print 'USAGE: python NER.py trainFile testFile' exit(0) printOp = '' if len(argv) > 2: printOp = '-print' featureFactory = FeatureFactory() # read the train and test data trainData = featureFactory.readData(argv[0]) testData = featureFactory.readData(argv[1]) # add the features trainDataWithFeatures = featureFactory.setFeaturesTrain(trainData) testDataWithFeatures = featureFactory.setFeaturesTest(testData) # write the updated data into JSON files featureFactory.writeData(trainDataWithFeatures, 'trainWithFeatures') featureFactory.writeData(testDataWithFeatures, 'testWithFeatures') # run MEMM output = Popen([ 'java', '-cp', '../java/classes', '-Xmx2G', 'MEMM', 'trainWithFeatures.json', 'testWithFeatures.json', printOp ], stdout=PIPE).communicate()[0] # java -cp classes -Xmx1G MEMM trainWithFeatures.json testWithFeatures.json # java -cp ../java/classes -Xmx1G MEMM trainWithFeatures.json testWithFeatures.json print output
def test(): import pprint from FeatureFactory import FeatureFactory af1 = FeatureFactory.getInstance(FeatureType.ADDRESS,FeedType.FEATURES) a1 = af1.get(ref='one_feat') af2 = FeatureFactory.getInstance(FeatureType.ADDRESS,FeedType.CHANGEFEED) a2 = af2.get(ref='two_chg') a2.setVersion(100) a2.setObjectType('Parcel') a2.setAddressNumber(100) a2.setAddressId(100) a2.setRoadName('Smith Street') af3 = FeatureFactory.getInstance(FeatureType.ADDRESS,FeedType.RESOLUTIONFEED) a3 = af3.get(ref='three_res') a3.setChangeId(200) a3.setVersion(200) a3.setAddressNumber(200) a3.setRoadName('Jones Road') print a1,a2,a3 r2 = af2.convert(a2,ActionType.UPDATE) r3 = af3.convert(a3,ApprovalType.UPDATE) pprint.pprint (r2) pprint.pprint (r3)
def test(): import pprint from FeatureFactory import FeatureFactory af1 = FeatureFactory.getInstance(FeatureType.ADDRESS, FeedType.FEATURES) a1 = af1.get(ref='one_feat') af2 = FeatureFactory.getInstance(FeatureType.ADDRESS, FeedType.CHANGEFEED) a2 = af2.get(ref='two_chg') a2.setVersion(100) a2.setObjectType('Parcel') a2.setAddressNumber(100) a2.setAddressId(100) a2.setRoadName('Smith Street') af3 = FeatureFactory.getInstance(FeatureType.ADDRESS, FeedType.RESOLUTIONFEED) a3 = af3.get(ref='three_res') a3.setChangeId(200) a3.setVersion(200) a3.setAddressNumber(200) a3.setRoadName('Jones Road') print a1, a2, a3 r2 = af2.convert(a2, ActionType.UPDATE) r3 = af3.convert(a3, ApprovalType.UPDATE) pprint.pprint(r2) pprint.pprint(r3)
def output(partId, ch_aux): """Uses the student code to compute the output for test cases.""" print '== Running your code ...' featureFactory = FeatureFactory() # read the train and test data trainData = featureFactory.readData("../data/train") testData = featureFactory.readTestData(ch_aux) # add the features trainDataWithFeatures = featureFactory.setFeaturesTrain(trainData) testDataWithFeatures = featureFactory.setFeaturesTest(testData) # write the updated data into JSON files featureFactory.writeData(trainDataWithFeatures, "trainWithFeaturesSubmit") featureFactory.writeData(testDataWithFeatures, "testWithFeaturesSubmit") # run MEMM output = Popen([ 'java', '-cp', 'classes', '-Xmx1G', 'MEMM', 'trainWithFeaturesSubmit.json', 'testWithFeaturesSubmit.json', '-submit' ], stdout=PIPE).communicate()[0] # print output[:100] os.remove('trainWithFeaturesSubmit.json') os.remove('testWithFeaturesSubmit.json') print '== Finished running your code' return output
def setUp(self): self.af = FeedRef((FeatureType.ADDRESS,FeedType.FEATURES)) self.ac = FeedRef((FeatureType.ADDRESS,FeedType.CHANGEFEED)) self.ar = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED)) self.aff = FeatureFactory.getInstance(self.af) self.afc = FeatureFactory.getInstance(self.ac) self.afr = FeatureFactory.getInstance(self.ar) self.dm = DataManager()
def setUp(self): self.af = FeedRef((FeatureType.ADDRESS, FeedType.FEATURES)) self.ac = FeedRef((FeatureType.ADDRESS, FeedType.CHANGEFEED)) self.ar = FeedRef((FeatureType.ADDRESS, FeedType.RESOLUTIONFEED)) self.aff = FeatureFactory.getInstance(self.af) self.afc = FeatureFactory.getInstance(self.ac) self.afr = FeatureFactory.getInstance(self.ar) self.dm = DataManager()
def setUp(self): self.dm = DataManager(ref_int) self.af = FeedRef((FeatureType.ADDRESS, FeedType.FEATURES)) self.ac = FeedRef((FeatureType.ADDRESS, FeedType.CHANGEFEED)) self.ar = FeedRef((FeatureType.ADDRESS, FeedType.RESOLUTIONFEED)) self.afc = FeatureFactory.getInstance(self.ac) self.afr = FeatureFactory.getInstance(self.ar) self.addr_r = _getTestAddress(af[FeedType.FEATURES])
def setUp(self): self.dm = DataManager(ref_int) self.af = FeedRef((FeatureType.ADDRESS,FeedType.FEATURES)) self.ac = FeedRef((FeatureType.ADDRESS,FeedType.CHANGEFEED)) self.ar = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED)) self.afc = FeatureFactory.getInstance(self.ac) self.afr = FeatureFactory.getInstance(self.ar) self.addr_r = _getTestAddress(af[FeedType.FEATURES])
def output(partId, ch_aux): """Uses the student code to compute the output for test cases.""" print '== Running your code ...' featureFactory = FeatureFactory() # read the train and test data trainData = featureFactory.readData("../data/train") testData = featureFactory.readTestData(ch_aux) # add the features trainDataWithFeatures = featureFactory.setFeaturesTrain(trainData); testDataWithFeatures = featureFactory.setFeaturesTest(testData); # write the updated data into JSON files featureFactory.writeData(trainDataWithFeatures, "trainWithFeaturesSubmit"); featureFactory.writeData(testDataWithFeatures, "testWithFeaturesSubmit"); # run MEMM output = Popen(['java','-cp', 'classes', '-Xmx1G' ,'MEMM' ,'trainWithFeaturesSubmit.json', 'testWithFeaturesSubmit.json', '-submit'], stdout=PIPE).communicate()[0] # print output[:100] os.remove('trainWithFeaturesSubmit.json') os.remove('testWithFeaturesSubmit.json') print '== Finished running your code' return output
def main(argv): if len(argv) < 2: print 'USAGE: python NER.py trainFile testFile' exit(0) printOp = '' if len(argv) > 2: printOp = '-print' featureFactory = FeatureFactory() # read the train and test data trainData = featureFactory.readData(argv[0]) testData = featureFactory.readData(argv[1]) # add the features trainDataWithFeatures = featureFactory.setFeaturesTrain(trainData); testDataWithFeatures = featureFactory.setFeaturesTest(testData); # write the updated data into JSON files featureFactory.writeData(trainDataWithFeatures, 'trainWithFeatures'); featureFactory.writeData(testDataWithFeatures, 'testWithFeatures'); # run MEMM output = Popen(['java','-cp', 'classes', '-Xmx1G' ,'MEMM' ,'trainWithFeatures.json', 'testWithFeatures.json', printOp], stdout=PIPE).communicate()[0] print output
def main(argv): # defaults if len(argv) == 0: argv.append("../data/train") argv.append("../data/dev") elif len(argv) < 2: print ('USAGE: python NER.py trainFile testFile') exit(0) # Set this to -print to print printOp = '' if len(argv) > 2: printOp = '-print' featureFactory = FeatureFactory() # read the train and test data trainData = featureFactory.readData(argv[0]) testData = featureFactory.readData(argv[1]) # add the features trainDataWithFeatures = featureFactory.setFeaturesTrain(trainData); testDataWithFeatures = featureFactory.setFeaturesTest(testData); # write the updated data into JSON files featureFactory.writeData(trainDataWithFeatures, 'trainWithFeatures'); featureFactory.writeData(testDataWithFeatures, 'testWithFeatures'); # run MEMM output = Popen(['java','-cp', 'classes', '-Xmx1G' ,'MEMM' ,'trainWithFeatures.json', 'testWithFeatures.json', printOp], stdout=PIPE).communicate()[0] print (output)
def test(self): global refsnap refsnap = {0:None,1:None,2:None} af = {ft:FeatureFactory.getInstance(FeedRef((FeatureType.ADDRESS,ft))) for ft in (FeedType.FEATURES,FeedType.CHANGEFEED,FeedType.RESOLUTIONFEED)} gf = {ft:FeatureFactory.getInstance(FeedRef((FeatureType.GROUPS,ft))) for ft in (FeedType.CHANGEFEED,FeedType.RESOLUTIONFEED)} af[3] = FeatureFactory.getInstance(FeedRef((FeatureType.GROUPS,FeedType.CHANGEFEED))) #with DataManager(start=None) as dm: # dm.start(FeedType.CHANGEFEED) with DataManager() as dm: dm.registermain(self) self.test1(dm,af)
def test(): from pprint import pprint as pp af_f = FeatureFactory.getInstance( FeedRef(FeatureType.ADDRESS, FeedType.FEATURES)) af_c = FeatureFactory.getInstance( FeedRef(FeatureType.ADDRESS, FeedType.CHANGEFEED)) af_r = FeatureFactory.getInstance( FeedRef(FeatureType.ADDRESS, FeedType.RESOLUTIONFEED)) #axx = af_r.get() ac1 = af_f.get() #ac1._addressedObject_externalObjectId = 1000 ac1._components_addressType = 'Road' ac1._components_addressNumber = 100 ac1._components_roadName = 'The Terrace' ac1._version = 1 ac1._components_addressId = 100 ac1._workflow_sourceUser = '******' ac1a = af_c.convert(ac1, ActionType.ADD) ac1r = af_c.convert(ac1, ActionType.RETIRE) ac1u = af_c.convert(ac1, ActionType.UPDATE) #------------------------------------------------ ar1 = af_c.get() ar1._version = 100 ar1._changeId = 100 ar1._components_addressType = 'Road' ar1._components_addressNumber = 100 ar1._components_roadName = 'The Terrace' ar1a = af_r.convert(ar1, ApprovalType.ACCEPT) ar1d = af_r.convert(ar1, ApprovalType.DECLINE) ar1u = af_r.convert(ar1, ApprovalType.UPDATE) print 'CHGF-ADD' pp(ac1a) print 'CHGF-RET' pp(ac1r) print 'CHGF-UPD' pp(ac1u) print 'RESF-ACC' pp(ar1a) print 'RESF-DEC' pp(ar1d) print 'RESF-UPD' pp(ar1u)
def _processResolutionGroup(self,feat,cid,etft): '''Processes the res-address objects in a res-group. Subsequently populates the sub entities as feature-addresses. @param feat: dict representation of feature before object processing @type feat: Dict @param cid: Change ID or group change ID @type cid: Integer @param etft: Feed/Feature identifier @type etft: FeedRef @return: Instantiated feature object ''' featurelist = [] g = self.factory.get(model=feat['properties'])#group #HACK subst cid for cid+count string ce,feat2 = self.api.getOneFeature(etft,'{}/address?count={}'.format(cid,MAX_FEATURE_COUNT))#group entity/adr list if any(ce.values()): aimslog.error('Single-feature request failure {}'.format(ce)) etft2 = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED)) factory2 = FeatureFactory.getInstance(etft2) for f in feat2['entities']: a = factory2.get(model=f['properties']) elist2 = [] for e in f['entities']: elist2.append(self._populateEntity(e)) a._setEntities(elist2) featurelist.append(a) g._setEntities(featurelist) return g
def test(): from pprint import pprint as pp af_f = FeatureFactory.getInstance(FeedRef(FeatureType.ADDRESS,FeedType.FEATURES)) af_c = FeatureFactory.getInstance(FeedRef(FeatureType.ADDRESS,FeedType.CHANGEFEED)) af_r = FeatureFactory.getInstance(FeedRef(FeatureType.ADDRESS,FeedType.RESOLUTIONFEED)) #axx = af_r.get() ac1 = af_f.get() #ac1._addressedObject_externalObjectId = 1000 ac1._components_addressType = 'Road' ac1._components_addressNumber = 100 ac1._components_roadName = 'The Terrace' ac1._version = 1 ac1._components_addressId = 100 ac1._workflow_sourceUser = '******' ac1a = af_c.convert(ac1,ActionType.ADD) ac1r = af_c.convert(ac1,ActionType.RETIRE) ac1u = af_c.convert(ac1,ActionType.UPDATE) #------------------------------------------------ ar1 = af_c.get() ar1._version = 100 ar1._changeId = 100 ar1._components_addressType = 'Road' ar1._components_addressNumber = 100 ar1._components_roadName = 'The Terrace' ar1a = af_r.convert(ar1,ApprovalType.ACCEPT) ar1d = af_r.convert(ar1,ApprovalType.DECLINE) ar1u = af_r.convert(ar1,ApprovalType.UPDATE) print 'CHGF-ADD' pp(ac1a) print 'CHGF-RET' pp(ac1r) print 'CHGF-UPD' pp(ac1u) print 'RESF-ACC' pp(ar1a) print 'RESF-DEC' pp(ar1d) print 'RESF-UPD' pp(ar1u)
def _processAddressEntity(self,feat): '''Processes feature data into address object @param feat: dict representation of feature before object processing @type feat: Dict @return: Instantiated Address entity ''' #return EntityAddress.getInstance(feat) return self._processSimpleEntity(FeatureFactory.getInstance(FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED))).get,feat)
def __init__(self, language): self.total_labels = [] self.klasses = [] self.language = language self.train_sentences = [] self.test_sentenses = [] self.factory = FeatureFactory() self.viterbi = Viterbi()
def castTo(self,requiredtype,address): '''Convenience method abstracting the casting function used to downcast address objects to the various feed required formats @param requiredtype: Address format requirement in FeedRef format @type requiredtype: FeedRef @param address: Address object being cast @type address: Address @return: Address ''' if not requiredtype in FeedType.reverse.keys(): raise Exception('unknown feed/address type') return FeatureFactory.getInstance(FeedRef((FeatureType.ADDRESS,requiredtype))).cast(address)
def getInstance(data,etft=FeedRef((FeatureType.ADDRESS,FeedType.FEATURES))): '''Gets instance of Entity object defaulting to Addressfeed/Feature @param data: Dict containing AF Entity object attributes @param etft: Address Entity feedref @type etft: FeedRef @return: Populated Entity object ''' from FeatureFactory import FeatureFactory ff = FeatureFactory.getInstance(etft) return ff.get(model=data)
def getInstance(data, etft=FeedRef((FeatureType.ADDRESS, FeedType.FEATURES))): '''Gets instance of Entity object defaulting to Addressfeed/Feature @param data: Dict containing AF Entity object attributes @param etft: Address Entity feedref @type etft: FeedRef @return: Populated Entity object ''' from FeatureFactory import FeatureFactory ff = FeatureFactory.getInstance(etft) return ff.get(model=data)
def clone(a,b=None): '''Clones attributes of A to B and instantiates B (as type A) if not provided @param a: Feature object to-be cloned @type a: Feature @param b: Feature object being overwritten (optional) @type b: Feature @return: Manual deepcop of Feature object ''' #duplicates only attributes set in source object from FeatureFactory import FeatureFactory if not b: b = FeatureFactory.getInstance(a.type).get() for attr in a.__dict__.keys(): setattr(b,attr,getattr(a,attr)) return b
def clone(a, b=None): '''Clones attributes of A to B and instantiates B (as type A) if not provided @param a: Feature object to-be cloned @type a: Feature @param b: Feature object being overwritten (optional) @type b: Feature @return: Manual deepcop of Feature object ''' #duplicates only attributes set in source object from FeatureFactory import FeatureFactory if not b: b = FeatureFactory.getInstance(a.type).get() for attr in a.__dict__.keys(): setattr(b, attr, getattr(a, attr)) return b
def main(): print 'USAGE: python NER.py trainFile testFile' featureFactory = FeatureFactory() # read the train and test data trainData = featureFactory.readData("../data/train") testData = featureFactory.readData("../data/dev") # add the features trainDataWithFeatures = featureFactory.setFeaturesTrain(trainData); testDataWithFeatures = featureFactory.setFeaturesTest(testData); # write the updated data into JSON files featureFactory.writeData(trainDataWithFeatures, 'trainWithFeatures'); featureFactory.writeData(testDataWithFeatures, 'testWithFeatures'); # run MEMM output = Popen(['java','-cp', 'classes', '-Xmx1G' ,'MEMM' ,'trainWithFeatures.json', 'testWithFeatures.json', '-print'], stdout=PIPE).communicate()[0] print output
def main(argv): # defaults if len(argv) == 0: argv.append("../data/train") argv.append("../data/dev") elif len(argv) < 2: print 'USAGE: python NER.py trainFile testFile' exit(0) # Set this to -print to print printOp = '' if len(argv) > 2: printOp = '-print' featureFactory = FeatureFactory() # read the train and test data trainData = featureFactory.readData(argv[0]) testData = featureFactory.readData(argv[1]) # add the features trainDataWithFeatures = featureFactory.setFeaturesTrain(trainData) testDataWithFeatures = featureFactory.setFeaturesTest(testData) # write the updated data into JSON files featureFactory.writeData(trainDataWithFeatures, 'trainWithFeatures') featureFactory.writeData(testDataWithFeatures, 'testWithFeatures') # run MEMM output = Popen([ 'java', '-cp', 'classes', '-Xmx2G', 'MEMM', 'trainWithFeatures.json', 'testWithFeatures.json', printOp ], stdout=PIPE).communicate()[0] print output
def output(partId, ch_aux): """Uses the student code to compute the output for test cases.""" print "== Running your code ..." featureFactory = FeatureFactory() # read the train and test data trainData = featureFactory.readData("../data/train") testData = featureFactory.readTestData(ch_aux) # add the features trainDataWithFeatures = featureFactory.setFeaturesTrain(trainData) testDataWithFeatures = featureFactory.setFeaturesTest(testData) # write the updated data into JSON files featureFactory.writeData(trainDataWithFeatures, "trainWithFeaturesSubmit") featureFactory.writeData(testDataWithFeatures, "testWithFeaturesSubmit") # run MEMM output = Popen( [ "java", "-cp", "classes", "-Xmx1G", "MEMM", "trainWithFeaturesSubmit.json", "testWithFeaturesSubmit.json", "-submit", ], stdout=PIPE, ).communicate()[0] # print output[:100] os.remove("trainWithFeaturesSubmit.json") os.remove("testWithFeaturesSubmit.json") print "== Finished running your code" return output
def testgrpresfeedAUD(self,dm,af): ver = 6977370 #cid = 4117724 cid = 4117720 #pull address from features (map) grp_r = self.gettestgroup(FeatureFactory.getInstance(FeedRef((FeatureType.GROUPS,FeedType.RESOLUTIONFEED)))) aimslog.info('*** GROUP Resolution ACCEPT '+str(time.clock())) rqid1 = 4321234 dm.acceptGroup(grp_r,rqid1) resp = None while True: _,resp,_ = self.testresp(dm,FeedType.RESOLUTIONFEED) if resp: print rqid1,resp[0].meta.requestId break time.sleep(5) ver += 1
def testuseractions(self,dm): '''create and submit user actions, add/update/delete ''' ver = 100 uid = 100 rqid = 100 etft = FeedRef((FeatureType.USERS,FeedType.ADMIN)) uf = FeatureFactory.getInstance(etft) user = uf.get('local_test_user') user.setUserId(uid) user._version = ver user._userName = '******' user._email = '*****@*****.**' user._requiresProgress = 'False' user._organisation = 'LINZ' user._role = 'follower' dm.addUser(user,rqid) while True: _,_,resp = self.testresp(dm,FeedType.ADMIN) if resp: print rqid,resp[0].meta.requestId break time.sleep(5) rqid+=1 user._userName = '******' dm.updateUser(user,rqid) while True: _,_,resp = self.testresp(dm,FeedType.ADMIN) if resp: print rqid,resp[0].meta.requestId break time.sleep(5) rqid+=1 dm.deleteUser(user,rqid) while True: _,_,resp = self.testresp(dm,FeedType.ADMIN) if resp: print rqid,resp[0].meta.requestId break time.sleep(5)
def __init__(self,params,queues): '''Initialise new DataSync object splitting out config parameters @param params: List of configuration parameters @type params: List<?> @param queues: List of IOR queues @type queues: Dict<String,Queue.Queue> ''' #from DataManager import FEEDS super(DataSync,self).__init__() #thread reference, ft to AD/CF/RF, config info self.start_time = time.time() self.updater_running = False self.ref,self.etft,self.ftracker,self.conf = params self.data_hash = {dh:0 for dh in FEEDS.values()} self.factory = FeatureFactory.getInstance(self.etft) self.updater = DataUpdater.getInstance(self.etft) # unevaluated class self.inq = queues['in'] self.outq = queues['out'] self.respq = queues['resp']
def test_cluster(w, auto): #check if directory is empty-- should implement try/except later. if os.listdir(outputpath): finalcluster = FeatureFactory.cluster_100(w, auto)
class Perceptron: def __init__(self, language): self.total_labels = [] self.klasses = [] self.language = language self.train_sentences = [] self.test_sentenses = [] self.factory = FeatureFactory() self.viterbi = Viterbi() def read_data(self, train_file, test_file): self.read_training_data(train_file) self.read_testing_data(test_file) def read_training_data(self, train_file): list_of_training_instances = [] new_sentence = Sentence() for line in train_file: split = line.strip().split() if len(split) == 0 and new_sentence.size() != 0: if '-DOCSTART-' not in new_sentence.full_sentence: self.train_sentences.append(new_sentence) new_sentence = Sentence() else: instance = EngInstance(split[0], split[1], split[2], split[3]) list_of_training_instances.append(instance) new_sentence.add(instance) if split[3] not in self.total_labels: self.total_labels.append(split[3]) print 'total number of training instances',len(list_of_training_instances), \ 'total number of training sentences', len(self.train_sentences) self.klasses_init() self.viterbi.train(self.total_labels, self.train_sentences) def klasses_init(self): for label in self.total_labels: self.klasses.append(Klass(label)) def tag_klass(self, tag): for klass in self.klasses: if klass.tag == tag: return klass return None def read_testing_data(self, test_file): list_of_testing_instances = [] new_sentence = Sentence() for line in test_file: split = line.strip().split() if len(split) == 0 and new_sentence.size() != 0: if '-DOCSTART-' not in new_sentence.full_sentence: self.test_sentenses.append(new_sentence) new_sentence = Sentence() else: instance = EngInstance(split[0], split[1], split[2], split[3]) list_of_testing_instances.append(instance) new_sentence.add(instance) print 'total number of testing instances',len(list_of_testing_instances), \ 'total number of testing sentences', len(self.test_sentenses) def computeFeatures(self): for sentence in self.train_sentences: self.factory.compute_sentence_features_eng(sentence) for sentence in self.test_sentenses: self.factory.compute_sentence_features_eng(sentence) def train(self): iteration = 0 total = len(self.train_sentences) while iteration < 10: error = 0 for i in range(len(self.train_sentences)): sentence = self.train_sentences[i] path = self.classify(sentence) for index in range(len(sentence.instances)): instance = sentence.instances[index] if path[index] == instance.label: instance.predicted_label = instance.label else: guess = self.tag_klass(path[index]) instance.predicted_label = path[index] gold = self.tag_klass(instance.label) error += 1 guess.adjust(instance.features, '-') gold.adjust(instance.features, '+') self.factory.features_update(sentence) for klass in self.klasses: klass.update() iteration += 1 print 'Iteration %d: number of errors %d' % (iteration, error) for klass in self.klasses: klass.average_weights() def classify(self, sentence): return self.viterbi.viterbi(sentence, self.klasses) def test(self): correct = 0 wrong = 0 report_summary = defaultdict(lambda:0) for i in range(len(self.train_sentences)): sentence = self.train_sentences[i] path = self.classify(sentence) for index in range(len(sentence.instances)): instance = sentence.instances[index] instance.predicted_label = path[index] self.factory.features_update(sentence) for sentence in self.test_sentenses: path = self.classify(sentence) for index in range(len(sentence.instances)): instance = sentence.instances[index] guess = self.tag_klass(path[index]) gold = self.tag_klass(instance.label) report_summary[(gold.tag, guess.tag)] += 1 if guess.tag != gold.tag: gold.FN += 1 guess.FP += 1 wrong += 1 else: gold.TP += 1 if guess.tag != 'O': correct += 1 for label_1 in self.total_labels: print label_1, "&", print for label_1 in self.total_labels: print label_1, for label_2 in self.total_labels: print "&", report_summary[(label_1, label_2)], print "\\\\ \\hline" print correct, wrong for klass in self.klasses: try: P = float(klass.TP)/(klass.TP + klass.FP) except: P = 0 try: R = float(klass.TP)/(klass.TP + klass.FN) except: R = 0 try: F = 2 * P * R /(P + R) * 100 except: F = 0 print "%s & %.2f & %.2f & %.2f" % (klass.tag, P * 100, R * 100, F)
#write past feature name to file with open('last', 'w') as f: f.write(feature) if feature == '1': fe = features.FEATURE elif feature == '2': fe = features.FEATURE2 elif feature == '3': fe = features.FEATURE3 elif feature == 'stress': fe = features.STRESSTEST #options for cluster: k = raw_input('Number of clusters (k): ') or 'auto' if k != 'auto': k = int(k) t = int(raw_input('Number of iterations: ') or 1) weightlist = raw_input('enter weight vector, or auto: ') or '1 0 0' if weightlist == 'auto': w = 'auto' else: w = weightlist.split(" ") w = map(float, w) euclidean = raw_input('Use Euclidean distance? [Y/N]: ') or 'N' if euclidean == 'Y' or euclidean == 'y': euclid = True FeatureFactory = FeatureFactory(44100, fe, mp3list, k, t, run_before, euclid) test_cluster(w, auto)