def main(): t = Taxonomy() # signifiers = t.get_signifiers('Efficiency') #extended lists signifiers = t.get_signifiers_wn('Usability') # wordnet lists signifier_list = '' for signifier in signifiers: signifier_list = signifier + ' ' + signifier_list get_counts(signifier_list)
def main(): t = Taxonomy() # signifiers = t.get_signifiers('Efficiency') #extended lists signifiers = t.get_signifiers_wn('Usability') # wordnet lists signifier_list = '' for signifier in signifiers: signifier_list = signifier + ' ' + signifier_list get_counts(signifier_list)
def main(): from names import Taxonomy t = Taxonomy() # signifiers = t.get_signifiers('Efficiency') #extended lists signifiers = t.get_signifiers_wn("Usability") # wordnet lists signifier_list = "" for signifier in signifiers: signifier_list = signifier + " " + signifier_list get_counts(signifier_list)
def get_counts(keyword): """ store in the database""" store_cursor = connect_corpus("data_objects") t = Taxonomy() prod = 'Nautilus' total = 50 query_string = """ select event from data_objects.refsq_data order by rand() limit %(total)d""" % {"prod":prod, "total":total} # msr_type = "Mail" where product = "%(prod)s" # = """select event from data_objects.refsq_data where match(event) # against (\'%(key)s\' in boolean mode) # order by rand() limit %(total)d""" % {"key":keyword, "total":total} try: # print query_string store_cursor.execute(query_string) yes = 0.0 no = 0.0 i = 0 wn_fn_count = 0 ext_fn_count = 0 for result in store_cursor.fetchall(): event = result.values()[0] #result = str(store_cursor.fetchall().values()[0]) #{'count(*)': 6L} dict print event i = i + 1 answer = raw_input("\n******\n"+ str(i) + "/" + str(total) + " Quality: [u/r/m/e/p/f/n]: ") #wait for user input if answer != 'n': yes = yes + 1.0 #check against our algorithm if answer == 'u': signifiers_ext = t.get_signifiers('Usability') signifiers_wn = t.get_signifiers_wn('Usability') #print signifiers elif answer == 'r': signifiers_ext = t.get_signifiers('Reliability') signifiers_wn = t.get_signifiers_wn('Reliability') elif answer == 'm': signifiers_ext = t.get_signifiers('Maintainability') signifiers_wn = t.get_signifiers_wn('Maintainability') elif answer == 'e': signifiers_ext = t.get_signifiers('Efficiency') signifiers_wn = t.get_signifiers_wn('Efficiency') elif answer == 'p': signifiers_ext = t.get_signifiers('Portability') signifiers_wn = t.get_signifiers_wn('Portability') elif answer == 'f': signifiers_ext = t.get_signifiers('Functionality') signifiers_wn = t.get_signifiers_wn('Functionality') found_wn, found_ext = check_list(event, signifiers_wn, signifiers_ext) if not found_wn: # false negative wn_fn_count += 1 print wn_fn_count if not found_ext: ext_fn_count += 1 print ext_fn_count else: no = no + 1.0 # TODO could extend this to see whether the standard query matches it even so wn_rate = wn_fn_count/float(yes) ext_rate = ext_fn_count/float(yes) #of the ones we marked, how many were not found? print ext_fn_count print "Number of matches was %s" % (yes) print "False negative rate was wn: %s and ext: %s" % (str(wn_rate),str(ext_rate)) except (ValueError): print 'Error in query syntax'
def get_counts(keyword): """ store in the database""" store_cursor = connect_corpus("data_objects") t = Taxonomy() prod = 'Nautilus' total = 50 query_string = """ select event from data_objects.refsq_data order by rand() limit %(total)d""" % { "prod": prod, "total": total } # msr_type = "Mail" where product = "%(prod)s" # = """select event from data_objects.refsq_data where match(event) # against (\'%(key)s\' in boolean mode) # order by rand() limit %(total)d""" % {"key":keyword, "total":total} try: # print query_string store_cursor.execute(query_string) yes = 0.0 no = 0.0 i = 0 wn_fn_count = 0 ext_fn_count = 0 for result in store_cursor.fetchall(): event = result.values()[0] #result = str(store_cursor.fetchall().values()[0]) #{'count(*)': 6L} dict print event i = i + 1 answer = raw_input("\n******\n" + str(i) + "/" + str(total) + " Quality: [u/r/m/e/p/f/n]: ") #wait for user input if answer != 'n': yes = yes + 1.0 #check against our algorithm if answer == 'u': signifiers_ext = t.get_signifiers('Usability') signifiers_wn = t.get_signifiers_wn('Usability') #print signifiers elif answer == 'r': signifiers_ext = t.get_signifiers('Reliability') signifiers_wn = t.get_signifiers_wn('Reliability') elif answer == 'm': signifiers_ext = t.get_signifiers('Maintainability') signifiers_wn = t.get_signifiers_wn('Maintainability') elif answer == 'e': signifiers_ext = t.get_signifiers('Efficiency') signifiers_wn = t.get_signifiers_wn('Efficiency') elif answer == 'p': signifiers_ext = t.get_signifiers('Portability') signifiers_wn = t.get_signifiers_wn('Portability') elif answer == 'f': signifiers_ext = t.get_signifiers('Functionality') signifiers_wn = t.get_signifiers_wn('Functionality') found_wn, found_ext = check_list(event, signifiers_wn, signifiers_ext) if not found_wn: # false negative wn_fn_count += 1 print wn_fn_count if not found_ext: ext_fn_count += 1 print ext_fn_count else: no = no + 1.0 # TODO could extend this to see whether the standard query matches it even so wn_rate = wn_fn_count / float(yes) ext_rate = ext_fn_count / float( yes) #of the ones we marked, how many were not found? print ext_fn_count print "Number of matches was %s" % (yes) print "False negative rate was wn: %s and ext: %s" % (str(wn_rate), str(ext_rate)) except (ValueError): print 'Error in query syntax'