def mainline(train=False): datadir = DIR["BASE"] + "data/" if train is True: featurefile = datadir + "train-features.txt" xmldir = DIR["BASE"] + "demo/train/" else: featurefile = datadir + "test-features.txt" xmldir = DIR["BASE"] + "demo/test/" deleteFiles([featurefile]) # infile = xmldir + 'C08-1122-parscit-section.xml' client_socket = getConnection() for infile in glob(xmldir + "*.xml"): try: print infile + " is being processed." if train is True: generateTrainFeatures(client_socket, infile, featurefile) else: generateTestFeatures(client_socket, infile, featurefile) except Exception as e: print "Some Exception in the main pipeline" print (str(type(e))) print str(e) logging.exception("Something awfull !!") model = DIR["DATA"] + "sec-tfidf-model.txt" if train is False: # TESTING outfile = DIR["DATA"] + "sec-tfidf-test-out.txt" predictSvm(featurefile, model, outfile) extractValues(outfile) outstring = "Default values Test results" analyze(featurefile, outfile, outstring=outstring) pickleIt() else: # TRAINING trainSvm(featurefile, model) outfile = DIR["DATA"] + "sec-tfidf-train-out.txt" predictSvm(featurefile, model, outfile) outstring = "Default values" analyze(featurefile, outfile, outstring=outstring)
def mainline(train=False): datadir = DIR['BASE'] + "data/" if train is True: featurefile = datadir + 'train-features.txt' xmldir = DIR['BASE'] + "demo/train/" else: featurefile = datadir + 'test-features.txt' xmldir = DIR['BASE'] + "demo/test/" deleteFiles([featurefile]) #infile = xmldir + 'C08-1122-parscit-section.xml' client_socket = getConnection() for infile in glob(xmldir + "*.xml"): try: print infile + " is being processed." if train is True: generateTrainFeatures(client_socket, infile, featurefile) else: generateTestFeatures(client_socket, infile, featurefile) except Exception as e: print "Some Exception in the main pipeline" print(str(type(e))) print str(e) logging.exception("Something awfull !!") model = DIR['DATA'] + "sec-tfidf-model.txt" if train is False: # TESTING outfile = DIR['DATA'] + "sec-tfidf-test-out.txt" predictSvm(featurefile, model, outfile) extractValues(outfile) outstring = "Default values Test results" analyze(featurefile, outfile, outstring=outstring) pickleIt() else: # TRAINING trainSvm(featurefile, model) outfile = DIR['DATA'] + "sec-tfidf-train-out.txt" predictSvm(featurefile, model, outfile) outstring = "Default values" analyze(featurefile, outfile, outstring=outstring)
set = [] for k in range(11): curr = choice(bucket) set.append(curr) bucket.remove(curr) all_sets.append(set) for i in range(07): test_set = all_sets[i] train_set = [] for set in [all_sets[z] for z in range(07) if z != i]: train_set.extend(set) for key in train_set: writeToFile(featurefile, data[key]['features'] + '\n', 'a') trainSvm(featurefile, model, gamma=1) predictSvm(featurefile, model, outfile) outstring = "Training Fold : " + str(i) print "************* " + outstring + " *************" analyze(featurefile, outfile, resfile, outstring) deleteFiles([featurefile, outfile]) for key in test_set: writeToFile(featurefile, data[key]['features'] + '\n', 'a') predictSvm(featurefile, model, outfile) outstring = "Testing Fold : " + str(i) pre, rec = analyze(featurefile, outfile, resfile, outstring) precision.append(pre) recall.append(rec) print precision