from jobs import utils import pdb test_file = 'd:/jobs/dctree/degree/test.csv' labels = utils.get_labels(test_file, 0) pdb.set_trace() plabel = utils.read_rst('result') i = 0 rst = 0 for label in labels: if plabel[i] == label: rst += 1 i += 1 print rst
from jobs import utils import pdb #pdb.set_trace() test_file = 'd:/jobs/dctree/random/test.csv' labels = utils.get_labels(test_file, 7) plabel = [] # for j in range(10): # plabel.append(utils.read_rst('result03' +str(j)+ '.txt')) # for j in range(10): # plabel.append(utils.read_rst('result04' +str(j)+ '.txt')) # for j in range(10): # plabel.append(utils.read_rst('result08' +str(j)+ '.txt')) # for j in range(10): # plabel.append(utils.read_rst('result09' +str(j)+ '.txt')) rst = [0 for j in xrange(1)] plabel.append(utils.read_rst('finalrut')) i = 0 for label in labels: for j in range(1): if plabel[j][i] == label: rst[j] += 1 i += 1 print rst
try: conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') file = open('d:/jobs/dctree/position/train.csv', 'w+') sql = 'select userid, age, bstart_year, gender, shortmar from jobs_uinfo' cur.execute(sql) userdlst = cur.fetchall() sqlze = 'select wk.salary, wk.industry, wk.position_name from work_size as wk' position_dct = get_position_meta() major_dct = read_rst('sharemajor') cur.execute(sqlze) sizelst = cur.fetchall() i = 0 for userd in userdlst: sizes = sizelst[i:i+3] i += 3 if not position_dct.has_key(sizes[1][2]): continue # print userd userid = [] userd = list(userd) if int(userd[1]) <= 20: userd[1] = '18' elif int(userd[1]) >= 60: userd[1] = '60'
passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select name, degreer0, degreer1, degreer2 from major ;' cur.execute(sql) majorlst = cur.fetchall() majordct = {} pdb.set_trace() treelst = utils.read_rst('result.txt') resultlst = [] for major in majorlst: majordct[major[0]] = [major[1], major[2], major[3]] sq = 'select userid, major from jobs_uinfotest' cur.execute(sq) usertst = cur.fetchall() i = 0 for user in usertst: if majordct.has_key(user[1]): majorat = majordct[user[1]] mnu = max(majorat) index = majorat.index(mnu) resultlst.append(index) else: resultlst.append(0)
from jobs import utils import pdb pdb.set_trace() test_file = 'd:/jobs/dctree/size/test.csv' labels = utils.get_labels(test_file, 5) plabel = [] # for j in range(10): # plabel.append(utils.read_rst('result03' +str(j)+ '.txt')) # for j in range(10): # plabel.append(utils.read_rst('result04' +str(j)+ '.txt')) for j in range(10): plabel.append(utils.read_rst('result05' +str(j)+ '.txt')) for j in range(6): plabel.append(utils.read_rst('result06' +str(j)+ '.txt')) rst = [0 for j in xrange(16)] i = 0 for label in labels: for j in range(16): if plabel[j][i] == label: rst[j] += 1 i += 1 print rst
from jobs import utils import pdb # pdb.set_trace() test_file = 'd:/jobs/dctree/size/test.csv' labels = utils.get_labels(test_file, 5) plabel = utils.read_rst('sizeresult.txt') i = 0 rst = 0 for label in labels: if plabel[i] == label: rst += 1 i += 1 print rst
from jobs import utils import pdb test_file = 'd:/jobs/dctree/salary/test.csv' labels = utils.get_labels(test_file, 6) # pdb.set_trace() plabel1 = utils.read_rst('sizeresult.txt') plabel2 = utils.read_rst('sizeresult1.txt') i = 0 rst = 0 for label in plabel1: if plabel2[i] == label: rst += 1 i += 1 print rst
postdct = get_position.get_pos() try: conn = MySQLdb.connect(host="localhost", user="******", passwd="123456", db="jobs", use_unicode=True, charset="utf8") cur = conn.cursor() cur.execute("set character_set_client=utf8") cur.execute("set character_set_connection=utf8") cur.execute("set character_set_database=utf8") cur.execute("set character_set_results=utf8") cur.execute("set character_set_server=utf8") # sql = 'select userid from jobs_uinfotest' # pdb.set_trace() position_dct = {} industryr = utils.read_rst("industryr.txt") with codecs.open("position_meta.txt") as file: lines = file.readlines() for linet in lines: line = linet[:-2] uline = unicode(line) position_dct[uline] = "1" sql = "select position_name, industry from work_sizetest" cur.execute(sql) positionlst = cur.fetchall() positions = [] industrys = [] result = [] flag = False i = 0 for position in positionlst:
conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select industry, position_name from work_sizetest' cur.execute(sql) workprobdct = utils.read_rst('workprobdct') worklst = cur.fetchall() i = 0 result = [] for j in xrange(20000): tworks = worklst[i:i + 2] i += 2 position_prob = {} for key in position_dct.keys(): # pdb.set_trace() position_prob[key] = get_position_prob(key, workprobdct, tworks) # pdb.set_trace() sortedprob = sorted(position_prob.iteritems(), key=lambda jj: jj[1], reverse=True)
#coding:utf8 import sys from jobs import utils reload(sys) import pdb sys.setdefaultencoding('utf8') test_file = 'd:/jobs/dctree/salary/test.csv' labels = utils.get_labels(test_file, 6) # pdb.set_trace() plabel1 = utils.read_rst('position.txt') plabel2 = utils.read_rst('positionlet') i = 0 rst = 0 for label in plabel1: if plabel2[i] == label: rst += 1 i += 1 print rst
conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select industry, salary from work_sizetest' cur.execute(sql) salaryprobdct = utils.read_rst('salaryprobdct') worklst = cur.fetchall() i = 0 result = [] for j in xrange(20000): salarys = worklst[i:i + 2] i += 2 salary_prob = {} for key in range(7): # pdb.set_trace() salary_prob[key] = get_salary_prob(key, salaryprobdct, salarys) # pdb.set_trace() sortedprob = sorted(salary_prob.iteritems(), key=lambda jj: jj[1], reverse=True)
cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') #sql = 'select userid from jobs_uinfotest' position_dct = {} with codecs.open('position_meta.txt') as file: lines = file.readlines() for linet in lines: line = linet[:-2] uline = unicode(line) lintlst = uline.split(',') position_dct[lintlst[0]] = lintlst[1] file = open('d:/jobs/xgboost/data.csv', 'w+') pdb.set_trace() keyshare = utils.read_rst('keyshare') sql = 'select position_name, industry from work_size' cur.execute(sql) datalst = [] positionlst = cur.fetchall() i = 0 for j in xrange(70000): worklst = positionlst[i:i+3] i += 1 if not position_dct.has_key(worklst[1][0]): continue else: rst = [] if keyshare.has_key(worklst[0][0]): rst.append(keyshare[worklst[0][0]])
cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select jb.userid,jb.degree,jb.age,jb.start_age,jb.bstart_year,jb.gender,jb.start_salary,wk.size \ from jobs_uinfotest as jb left join workexperiencetest as wk on \ jb.userid = wk.userid and wk.num = 1' cur.execute(sql) file = open('d:/jobs/dctree/salary/salary.csv', 'w+') useridlst = cur.fetchall() # rsultlabel = utils.read_rst('result.txt') # pdb.set_trace() #wsresult = utils.read_rst('wsresult.txt') # sizeresult = utils.read_rst('sizeresult.txt') salaryresult = utils.read_rst('salary') # posresult = utils.read_rst('position13') #degreelst = utils.read_rst('degree.txt') # degreelst = utils.read_rst('result1.txt') file.write('id,degree,size,salary,position_name\n') i = 0 # pdb.set_trace() j = 0 for userid in useridlst: result = [] result.append(userid[0]) print i # result.append(degreelst[i]) # result.append(sizeresult[i]) result.append(8) result.append(8)
#coding:utf8 import sys from jobs import utils reload(sys) import pdb sys.setdefaultencoding('utf8') test_file = 'd:/jobs/dctree/random/test.csv' labels = utils.get_labels(test_file, 9) # pdb.set_trace() plabel1 = utils.read_rst('result.txt') plabel2 = utils.read_rst('finalrut') i = 0 rst = 0 for label in labels: if plabel2[i] == label: rst += 1 i += 1 print rst
db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') file = open('d:/jobs/dctree/position/train.csv', 'w+') sql = 'select userid, age, bstart_year, gender, shortmar from jobs_uinfo' cur.execute(sql) userdlst = cur.fetchall() sqlze = 'select wk.salary, wk.industry, wk.position_name from work_size as wk' position_dct = get_position_meta() major_dct = read_rst('sharemajor') cur.execute(sqlze) sizelst = cur.fetchall() i = 0 for userd in userdlst: sizes = sizelst[i:i + 3] i += 3 if not position_dct.has_key(sizes[1][2]): continue # print userd userid = [] userd = list(userd) if int(userd[1]) <= 20: userd[1] = '18' elif int(userd[1]) >= 60: userd[1] = '60'
#coding:utf8 from jobs import utils import jieba import jieba.posseg as pseg import os import pdb import sys from sklearn import feature_extraction from sklearn.feature_extraction.text import TfidfTransformer from sklearn.feature_extraction.text import CountVectorizer reload(sys) sys.setdefaultencoding('utf8') letter_dct = utils.read_rst('letterdct') tfidf_dct = {} # pdb.set_trace() corpus = [] keys = [] for item in letter_dct.items(): keys.append(item[0]) corpus.append(' '.join(item[1][0])) # corpus = [[' '.join(item[1][0])] for item in letter_dct.items()] vectorizer = CountVectorizer() #该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频 transformer = TfidfTransformer() #该类会统计每个词语的tf-idf权值 tfidf = transformer.fit_transform(vectorizer.fit_transform( corpus)) #第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵 word = vectorizer.get_feature_names() #获取词袋模型中的所有词语 weight = tfidf.toarray() #将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
total = total*(pos1prob + pos2prob + industry1prob + industry2prob) return total try: conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select industry, position_name from work_sizetest' cur.execute(sql) workprobdct = utils.read_rst('workprobdct') worklst = cur.fetchall() i = 0 result = [] for j in xrange(20000): tworks = worklst[i:i+2] i += 2 position_prob = {} for key in position_dct.keys(): # pdb.set_trace() position_prob[key] = get_position_prob(key, workprobdct, tworks) # pdb.set_trace() sortedprob = sorted(position_prob.iteritems(), key=lambda jj:jj[1], reverse=True) # for prob in sortedprob: # print prob[0] + str(prob[1])
try: conn = MySQLdb.connect(host="localhost", user="******", passwd="123456", db="jobs", use_unicode=True, charset="utf8") cur = conn.cursor() cur.execute("set character_set_client=utf8") cur.execute("set character_set_connection=utf8") cur.execute("set character_set_database=utf8") cur.execute("set character_set_results=utf8") cur.execute("set character_set_server=utf8") file = open("d:/jobs/dctree/position/test.csv", "w+") sql = "select userid, age, bstart_year, gender, shortmar from jobs_uinfo limit 50000, 20000" cur.execute(sql) userdlst = cur.fetchall() sqlze = "select wk.salary, wk.industry, wk.position_name from work_size as wk limit 150000, 60000" position_dct = get_position_meta() major_dct = read_rst("sharemajor") cur.execute(sqlze) sizelst = cur.fetchall() i = 0 for userd in userdlst: sizes = sizelst[i : i + 3] i += 3 if not position_dct.has_key(sizes[1][2]): continue print userd userid = [] userd = list(userd) if int(userd[1]) <= 20: userd[1] = "18" elif int(userd[1]) >= 60: userd[1] = "60"
total = total*(pos1prob + pos2prob + industry1prob + industry2prob) return total try: conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select industry, position_name from work_sizetest' cur.execute(sql) workprobdct = utils.read_rst('workletterprobdct') worklst = cur.fetchall() i = 0 result = [] for j in xrange(20000): tworks = worklst[i:i+2] i += 2 position_prob = {} pdb.set_trace() for key in position_dct.keys(): # pdb.set_trace() position_prob[key] = get_position_prob(key, workprobdct, tworks) # pdb.set_trace() sortedprob = sorted(position_prob.iteritems(), key=lambda jj:jj[1], reverse=True) # for prob in sortedprob:
return total try: conn = MySQLdb.connect(host="localhost", user="******", passwd="123456", db="jobs", use_unicode=True, charset="utf8") cur = conn.cursor() cur.execute("set character_set_client=utf8") cur.execute("set character_set_connection=utf8") cur.execute("set character_set_database=utf8") cur.execute("set character_set_results=utf8") cur.execute("set character_set_server=utf8") sql = "select position_name from work_sizetest" cur.execute(sql) wordprobdct = utils.read_rst("position_word") wordlst = cur.fetchall() i = 0 result = [] # pdb.set_trace() for j in xrange(20000): tworks = wordlst[i : i + 2] i += 2 position_prob = {} for key in position_dct.keys(): position_prob[key] = get_position_prob(key, wordprobdct, tworks) sortedprob = sorted(position_prob.iteritems(), key=lambda jj: jj[1], reverse=True) # for prob in sortedprob: # print prob[0] + str(prob[1]) result.append(sortedprob[0][0])
from jobs import utils import pdb pdb.set_trace() test_file = 'd:/jobs/dctree/salary/test.csv' labels = utils.get_labels(test_file, 6) plabel = [] # for j in range(10): # plabel.append(utils.read_rst('result03' +str(j)+ '.txt')) # for j in range(10): # plabel.append(utils.read_rst('result04' +str(j)+ '.txt')) for j in range(10): plabel.append(utils.read_rst('result1' + str(j) + '.txt')) for j in range(10): plabel.append(utils.read_rst('result2' + str(j) + '.txt')) rst = [0 for j in xrange(20)] i = 0 for label in labels: for j in range(20): if plabel[j][i] == label: rst[j] += 1 i += 1 print rst
user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') #sql = 'select userid from jobs_uinfotest' # pdb.set_trace() position_dct = {} industryr = utils.read_rst('industryr.txt') with codecs.open('position_meta.txt') as file: lines = file.readlines() for linet in lines: line = linet[:-2] uline = unicode(line) position_dct[uline] = '1' sql = 'select position_name, industry from work_sizetest' cur.execute(sql) positionlst = cur.fetchall() positions = [] industrys = [] result = [] flag = False i = 0 for position in positionlst:
from jobs import utils import pdb test_file = 'd:/jobs/dctree/salary/test.csv' labels = utils.get_labels(test_file, 6) pdb.set_trace() plabel = utils.read_rst('salary') i = 0 rst = 0 for label in labels: if plabel[i] == label: rst += 1 i += 1 print rst
conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select position_name from work_sizetest' cur.execute(sql) wordprobdct = utils.read_rst('position_word') wordlst = cur.fetchall() i = 0 result = [] # pdb.set_trace() for j in xrange(20000): tworks = wordlst[i:i + 2] i += 2 position_prob = {} for key in position_dct.keys(): position_prob[key] = get_position_prob(key, wordprobdct, tworks) sortedprob = sorted(position_prob.iteritems(), key=lambda jj: jj[1], reverse=True) # for prob in sortedprob: # print prob[0] + str(prob[1])
from jobs import utils import pdb test_file = 'd:/jobs/dctree/salary/test.csv' labels = utils.get_labels(test_file, 6) pdb.set_trace() plabel1 = utils.read_rst('salaryresult.txt') plabel2 = utils.read_rst('salaryresult1.txt') i = 0 rst = 0 for label in plabel1: if plabel2[i] == label: rst += 1 i += 1 print rst
postdct = get_position.get_pos() try: conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') #sql = 'select userid from jobs_uinfotest' import pdb pdb.set_trace() position_dct = {} industryr = utils.read_rst('industryr.txt'); with codecs.open('position_meta.txt') as file: lines = file.readlines() for linet in lines: line = linet[:-2] uline = unicode(line) position_dct[uline] = '1' sql = 'select position_name, industry from work_sizetest' cur.execute(sql) positionlst = cur.fetchall() positions = [] industrys = [] result = [] flag = False i = 0 for position in positionlst:
#coding:utf8 from jobs import utils import jieba import jieba.posseg as pseg import os import pdb import sys from sklearn import feature_extraction from sklearn.feature_extraction.text import TfidfTransformer from sklearn.feature_extraction.text import CountVectorizer reload(sys) sys.setdefaultencoding('utf8') letter_dct = utils.read_rst('letterdct') tfidf_dct = {} # pdb.set_trace() corpus = [] keys = [] for item in letter_dct.items(): keys.append(item[0]) corpus.append(' '.join(item[1][0])) # corpus = [[' '.join(item[1][0])] for item in letter_dct.items()] vectorizer=CountVectorizer()#该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频 transformer=TfidfTransformer()#该类会统计每个词语的tf-idf权值 tfidf=transformer.fit_transform(vectorizer.fit_transform(corpus))#第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵 word=vectorizer.get_feature_names()#获取词袋模型中的所有词语 weight=tfidf.toarray()#将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
total = total*salary1prob*salary2prob # pdb.set_trace() return total try: conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select industry, salary from work_sizetest' cur.execute(sql) salaryprobdct = utils.read_rst('salaryprobdct') worklst = cur.fetchall() i = 0 result = [] for j in xrange(20000): salarys = worklst[i:i+2] i += 2 salary_prob = {} for key in range(7): # pdb.set_trace() salary_prob[key] = get_salary_prob(key, salaryprobdct, salarys) # pdb.set_trace() sortedprob = sorted(salary_prob.iteritems(), key=lambda jj:jj[1], reverse=True) # for prob in sortedprob: # print prob[0] + str(prob[1])
try: conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select jb.userid,jb.degree,jb.age,jb.start_age,jb.bstart_year,jb.gender,jb.start_salary,wk.size \ from jobs_uinfotest as jb left join workexperiencetest as wk on \ jb.userid = wk.userid and wk.num = 1' cur.execute(sql) file = open('d:/jobs/dctree/bresult.csv', 'w+') useridlst = cur.fetchall() rsultlabel = utils.read_rst('result.txt') pdb.set_trace() #wsresult = utils.read_rst('wsresult.txt') sizeresult = utils.read_rst('sizeresult.txt') salaryresult = utils.read_rst('salaryresult.txt') posresult = utils.read_rst('position.txt') #degreelst = utils.read_rst('degree.txt') degreelst = utils.read_rst('result.txt') rsultlabel = map(str, rsultlabel) file.write('id,degree,size,salary,position_name\n') i = 0 print rsultlabel pdb.set_trace() j = 0 for userid in useridlst: result = []
#coding:utf8 import sys from jobs import utils reload(sys) import pdb sys.setdefaultencoding('utf8') test_file = 'd:/jobs/dctree/salary/test.csv' labels = utils.get_labels(test_file, 6) # pdb.set_trace() plabel1 = utils.read_rst('salaryresult1.txt') plabel2 = utils.read_rst('salary') i = 0 rst = 0 for label in plabel1: if str(plabel2[i]) == label: rst += 1 i += 1 print rst
# coding:utf8 import sys from jobs import utils reload(sys) import pdb sys.setdefaultencoding("utf8") test_file = "d:/jobs/dctree/salary/test.csv" labels = utils.get_labels(test_file, 6) # pdb.set_trace() plabel1 = utils.read_rst("position.txt") plabel2 = utils.read_rst("positionlet") i = 0 rst = 0 for label in plabel1: if plabel2[i] == label: rst += 1 i += 1 print rst
try: conn = MySQLdb.connect(host="localhost", user="******", passwd="123456", db="jobs", use_unicode=True, charset="utf8") cur = conn.cursor() cur.execute("set character_set_client=utf8") cur.execute("set character_set_connection=utf8") cur.execute("set character_set_database=utf8") cur.execute("set character_set_results=utf8") cur.execute("set character_set_server=utf8") sql = "select jb.userid,jb.degree,jb.age,jb.start_age,jb.bstart_year,jb.gender,jb.start_salary,wk.size \ from jobs_uinfotest as jb left join workexperiencetest as wk on \ jb.userid = wk.userid and wk.num = 1" cur.execute(sql) file = open("d:/jobs/baysian/position.csv", "w+") useridlst = cur.fetchall() rsultlabel = utils.read_rst("result.txt") pdb.set_trace() # wsresult = utils.read_rst('wsresult.txt') sizeresult = utils.read_rst("sizeresult.txt") salaryresult = utils.read_rst("salaryresult.txt") posresult = utils.read_rst("positionlet") # degreelst = utils.read_rst('degree.txt') degreelst = utils.read_rst("result1.txt") rsultlabel = map(str, rsultlabel) file.write("id,degree,size,salary,position_name\n") i = 0 print rsultlabel # pdb.set_trace() j = 0 for userid in useridlst: result = []
start = time.clock() try: conn = MySQLdb.connect(host='localhost', user='******', passwd='123456', db='jobs', use_unicode=True, charset='utf8') cur = conn.cursor() cur.execute('set character_set_client=utf8') cur.execute('set character_set_connection=utf8') cur.execute('set character_set_database=utf8') cur.execute('set character_set_results=utf8') cur.execute('set character_set_server=utf8') sql = 'select name, degreer0, degreer1, degreer2 from major ;' cur.execute(sql) majorlst = cur.fetchall() majordct = {} pdb.set_trace() treelst = utils.read_rst('result.txt') resultlst = [] for major in majorlst: majordct[major[0]] = [major[1], major[2], major[3]] sq = 'select userid, major from jobs_uinfotest' cur.execute(sq) usertst = cur.fetchall() i = 0 for user in usertst: if majordct.has_key(user[1]): majorat = majordct[user[1]] mnu = max(majorat) index = majorat.index(mnu) resultlst.append(index) else: resultlst.append(0)
from jobs import utils import pdb pdb.set_trace() test_file = 'd:/jobs/dctree/salary/test.csv' labels = utils.get_labels(test_file, 6) plabel = [] # for j in range(10): # plabel.append(utils.read_rst('result03' +str(j)+ '.txt')) # for j in range(10): # plabel.append(utils.read_rst('result04' +str(j)+ '.txt')) for j in range(10): plabel.append(utils.read_rst('result1' +str(j)+ '.txt')) for j in range(10): plabel.append(utils.read_rst('result2' +str(j)+ '.txt')) rst = [0 for j in xrange(20)] i = 0 for label in labels: for j in range(20): if plabel[j][i] == label: rst[j] += 1 i += 1 print rst