def main(): userInput = parser.getInput() fileList = parser.getFiles(userInput['train']) pdata = parser.parseFiles(fileList) allsent = '' for f in pdata: allsent += f[3] all_words = FreqDist(w.lower() for w in word_tokenize(allsent) if w not in stopwords.words('english') ) global top_words top_words = all_words.keys()[:500] # pdata = getParseData() featdata = featureAggregator(pdata) print featdata[:10]
def main(): userInput = parser.getInput() fileList = parser.getFiles(userInput['train']) parsedata = parser.parseFiles(fileList) allsent = '' for f in parsedata: allsent += f[3] all_words = FreqDist(w.lower() for w in word_tokenize(allsent) if w not in stopwords.words('english') ) global top_words top_words = all_words.keys()[:500] featdata = extractor.featureAggregator(parsedata) # print featdata[20] print "Sample Data Item:\n\n" print "%20s %4s %4s %20s" % ("FILENAME", "LINENUM", "VOTE", "SENTENCE" ) print "-" * 79 print "%10s %4s %4s %20s" % (featdata[20][0], featdata[20][1], featdata[20][2], featdata[20][3]) print "\n\nFeatures of this Data Item" print "-" * 79 for key,val in featdata[20][4].items(): print "%50s : %10s" % (key, val ) # print "A sample feature: %s" % (featdata[20][4]) allacc = splitfeatdata(featdata) print "\n\n" print "-" * 60 print "Accuracy Values: %s" % (allacc) print "==" * 60 print "Overall Classifier Accuracy %4.4f " % (sum(allacc)/len(allacc))
def main(): database = raw_input("Enter name of .db file >") conn = sqlite3.connect(database) cursor = conn.cursor() cursor.execute(''' SELECT COUNT(*) FROM SQLITE_MASTER;''') nTables = cursor.fetchone() nTables = (nTables[0]) / 2 # account for input table + FD table initScreen = raw_input("Please select an option:\n" "1. Compute attribute closure\n" "2. Compare FD tables\n" "3. Normalize a table\n") if initScreen == '1': print getAttributeClosure(cursor) if initScreen == '2': print compareFDs(cursor) elif initScreen == '3': i = 0 while i < nTables: # loop for multiple tables within db, subject to change condition inTable = raw_input("Enter name of input table or q to quit >") if inTable == 'q': # break from loop if user wishes to quit break inFDtable = raw_input("Enter name of FD table >") inRows, inFDs, fdDict = getInput(inTable, inFDtable, cursor) os.system("clear") choice = raw_input( "Please choose one of the following " "(entering anything else will terminate the program):\n" "1. Convert to BCNF\n2. Convert to 3NF\n") if choice == '1': # convert to BCNF convertbcnf(inRows, inFDs, cursor, conn, fdDict, inTable[5:]) elif choice == '2': # convert to 3nf convert3nf(inRows, inFDs, cursor, conn, fdDict, inTable[5:]) else: # quit return i += 1
def main(): userInput = parser.getInput() fileList = parser.getFiles(userInput['train']) pdata = parser.parseFiles(fileList) allsent = '' for f in pdata: allsent += f[3] all_words = FreqDist(w.lower() for w in word_tokenize(allsent) if w not in stopwords.words('english')) global top_words top_words = all_words.keys()[:500] # pdata = getParseData() featdata = featureAggregator(pdata) print featdata[:10]