def __init__(self): data = GetDataLocation() self.data_path = data.get_path() self.pos_lines = open(os.path.join(self.data_path, "rt-polarity.pos"), "r").read().splitlines() self.neg_lines = open(os.path.join(self.data_path, "rt-polarity.neg"), "r").read().splitlines() # print num_lines_pos #total positive lines # print num_lines_neg #total neg lines pos_count = open(os.path.join(self.data_path, "rt-polarity.pos"), "r").read().split() neg_count = open(os.path.join(self.data_path, "rt-polarity.neg"), "r").read().split() self.pos_word_count = len(pos_count) self.neg_word_count = len(neg_count) self.trainset = [(x, 1) for x in self.pos_lines] + [(x, -1) for x in self.neg_lines] self.pos_1gram = {} self.pos_2gram = {} self.pos_3gram = {} self.pos_4gram = {} self.pos_4gram = {} self.neg_1gram = {} self.neg_2gram = {} self.neg_3gram = {} self.neg_4gram = {} self.neg_5gram = {} self.train()
def __init__(self, n): data = GetDataLocation() self.data_path = data.get_path() self.symbols = ['!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '+', '=', ',', '.', '<', '>', '?', '/'] #train set data statistic #define positive and negative word training set #train set data statistic #positive word training set poslines = open(self.data_path+'/rt-polarity.pos', 'r').read().splitlines() #negative word training set neglines = open(self.data_path+'/rt-polarity.neg', 'r').read().splitlines() self.stop_list=open(self.data_path+'/stopwordslist_1.file','r').read().splitlines() #test_data=open(r'pos.txt','r').read().splitlines() self.poslinesTrain = poslines[:] self.neglinesTrain = neglines[:] #open the file for checking the emotions self.pos_smiley = open(self.data_path+'/positive_smile.pos', 'r').read().split() self.neg_smiley = open(self.data_path+'/negative_smile.neg', 'r').read().split() #create the train set and the test set by attaching labels to text to form a #list of tuples (sentence, label). Labels are 1 for positive, -1 for negative self.trainset = [(x, 1) for x in self.poslinesTrain] + [(x, -1) for x in self.neglinesTrain] self.poswords = {}#for storing the word count in unigram self.negwords = {} self.poswords_bigram = {}#for bigram self.negwords_bigram = {} self.poswords_ngram = {}#for n gram self.negwords_ngram = {} self.n = n self.train(self.n)
def __init__(self): data = GetDataLocation() path = data.get_path() file_name = os.path.join(path, "spellcheck") self.NWORDS = self.train(self.words(file(file_name).read())) self.alphabet = 'abcdefghijklmnopqrstuvwxyz' self.pos = [] self.neg = [] return
def __init__(self): self.training_percent = 100 # in 0 to 100 range self.testing_percent = 20 # in 0 to 100 range data = GetDataLocation() data_path = data.get_path() self.train_file = data_path + "/svm.train" self.test_file = data_path + "/svm.test" self.model_file = data_path + "/model/svm.modeluni" self.dict_file = data_path + "/svm.dictuni" self.pos_file = data_path + "/svm.pos" self.neg_file = data_path + "/svm.neg" self.stop_words_file = data_path + "/stop"
def __init__(self): data = GetDataLocation() path = data.get_path() file_name = os.path.join(path, "stopwords") self.stop_word_list = open(file_name).read().splitlines() return
print "Classification Done!" file = open("result", 'a') file.write(param + ";" + str(p_acc) + '\n') file.close() except: file = open("result", 'a') file.write(param + ";ERROR\n") file.close() continue settings = Settings() data = GetDataLocation() data_path = data.get_path() settings.training_percent = 80 # in 0 to 100 range settings.testing_percent = 20 # in 0 to 100 range settings.train_file = data_path + "/svm.train" settings.test_file = data_path + "/svm.test" settings.model_file = data_path + "/model/svm.modeluni" settings.dict_file = data_path + "/svm.dictuni" settings.pos_file = data_path + "/svm.pos" settings.neg_file = data_path + "/svm.neg" settings.stop_words_file = data_path + "/stop" run = Analysis(settings) #run = Run(settings, False)