#!/usr/bin/python # -*- coding: utf-8 -*- # from wfMySQL import MySQLConnector from filter import dimension from wfDatabase import wfdb import codecs mysql = MySQLConnector() # Instance Database List Class obj_db = wfdb() # Get category and segment View name list view_list = obj_db.getViewList() def generateGexf(list_terms, category, chi_kind): len_term = len(list_terms) outfile = codecs.open('gexf/' + chi_kind + '_' + str(category) + '.gexf', 'w', 'utf-8') outfile.write('<?xml version="1.0" encoding="UTF-8"?>\n') outfile.write('<gexf xmlns="http://www.gexf.net/1.2draft" version="1.2">\n') outfile.write('<graph mode="static" defaultedgetype="undirected">\n') outfile.write('<nodes>\n') for i in range(len_term): outfile.write('<node id="%s" label="%s" />\n' % (str(i), list_terms[i])) outfile.write('</nodes>\n') x = 0 outfile.write('<edges>\n') for i in range(5): print 'Building edge in category-"' + str(category) + '", Dataset: ' + view_list[i]
# Get confidence interval def mean_confidence_interval(data, confidence=0.95): a = 1.0 * np.array(data) n = len(a) m, se = np.mean(a), sp.sem(a) h = se * sp.t._ppf((1 + confidence) / 2., n - 1) return [m, h, m - h, m + h] # Instance dimension class obj_dims = dimension() # Instance Database list class obj_db = wfdb() # Instance mysql connector class mysql = MySQLConnector() # Get Average CHI Score Dictionary dict_avg_chi = obj_dims.getAvgCHIList() # Get Maxiunm CHI Score Dictionary dict_max_chi = obj_dims.getMaxCHIList() # Filter CHI Score dictionary by threshold dict_flted_avgchi = obj_dims.doFilteredList(avg_filter_threshold, dict_avg_chi) dict_flted_maxchi = obj_dims.doFilteredList(max_filter_threshold, dict_max_chi) # Get Database View List tfidf_viewlist = obj_db.getTFIDFViewList() for source_db in range(5):
# Get most common element in list def most_common(list_term): dict_count = {} for item in list_term: dict_count.setdefault(item, 0) dict_count[item] += 1 return max(dict_count.iteritems(), key=itemgetter(1))[0] # Instance dimension class obj_dims = dimension() # Instance Database list class obj_db = wfdb() # Instance mysql connector class mysql = MySQLConnector() # Get Average CHI Score Dictionary dict_avg_chi = obj_dims.getAvgCHIList() # Get Maxiunm CHI Score Dictionary dict_max_chi = obj_dims.getMaxCHIList() # Filter CHI Score dictionary by threshold dict_flted_avgchi = obj_dims.doFilteredList(avg_filter_threshold, dict_avg_chi) dict_flted_maxchi = obj_dims.doFilteredList(max_filter_threshold, dict_max_chi) # Rest All value to 0.0 in dictionary dict_avg_zero = dict.fromkeys(dict_flted_avgchi, 0.0) dict_max_zero = dict.fromkeys(dict_flted_maxchi, 0.0)
new_category = [] for i in random_list: new_feature.append(feature_dataset[i]) new_category.append(category_dataset[i]) feature_dataset[:] = [] category_dataset[:] = [] return new_feature, new_category # Instance dimension class obj_dims = dimension() # Instance Database list class obj_db = wfdb() # Instance mysql connector class mysql = MySQLConnector() # Get Average CHI Score Dictionary list avg_chi_list = obj_dims.getAvgCHIList() # Get Max CHI Score dictionary list max_chi_list = obj_dims.getMaxCHIList() # Filter list by CHI score filted_avg_list = obj_dims.doFilteredList(avg_filter_num, avg_chi_list) filted_max_list = obj_dims.doFilteredList(max_filter_num, max_chi_list) # Get database View List tfidf_viewlist = obj_db.getTFIDFViewList() # Length of vector dimension
#!/usr/bin/python # -*- coding: utf-8 -*- # from wfMySQL import MySQLConnector from wfDatabase import wfdb import codecs import re import enchant # How many terms were you want to get limit = 300 mysql = MySQLConnector() # Instance Database List Class obj_db = wfdb() ptn1 = re.compile("^[\w\d]*$") ptn2 = re.compile("^[\d]*$") dic = enchant.Dict("en_US") def verifyEngNum(term): if ptn1.match(term): if ptn2.match(term): return 1 else: if dic.check(term): return 2 else: return 3
#!/usr/bin/python # -*- coding: utf-8 -*- # from wfMySQL import MySQLConnector # Instance Database connector class mysql = MySQLConnector() def pairReliablility(user1, user2, coder_clsfi): len_n = len(coder_clsfi) M = 0 for key, value in coder_clsfi.items(): if value[user1] == value[user2]: M += 1 rebilty = (2.0 * M) / (2.0 * len_n) print "User: "******", " + str(user2) print "M (Number of totally agreement): " + str(M) print "N1,N2 (Should agree with number): " + str(len_n) print "Mutual consent degree = 2M/(N1+N2): " + str(rebilty) + "\n\n" sql_get = "SELECT `SamplingNo`, `ClsNo1`, `UserId` FROM `CoderCompare` ORDER BY `CoderCompare`.`SamplingNo` ASC" result = mysql.queryrows(sql_get) coder_clsfi = {} for row in result: sno = int(row[0]) cls = int(row[1]) user = int(row[2])