def run(self): # Init the db object db = DB(self.dbHost, self.dbDatabase, self.dbTable, self.dbUser, self.dbPass) # Connect to the remote database server loginStatus = db.connect() self.emit(SIGNAL("postLogin(PyQt_PyObject, PyQt_PyObject)"), loginStatus, db)
def run(self): # Init the db object db = DB(self.dbHost, self.dbDatabase, self.dbTable, self.dbUser, self.dbPass) # Connect to the remote database server loginStatus = db.connect() self.postLoginSignal.emit(loginStatus, db)
def __init__(self, vec_file, pap, pat, pro): # self.wm = gensim.models.KeyedVectors.load_word2vec_format(vec_file,binary=True) self.wm = gensim.models.word2vec.Word2Vec.load_word2vec_format( vec_file, binary=True) self.paper_index = AnnoyIndexer() self.paper_index.load(pap) self.patent_index = AnnoyIndexer() self.patent_index.load(pat) self.project_index = AnnoyIndexer() self.project_index.load(pro) self.t2v = Convert2Vec(self.wm) self.cuttor = FilterCut() self.db = DB() self.featureIndex = self.buildFeatureIndex()
def start(self): # Compile the regex for pulling the card ID from all the data on a card # Do this here so it isn't done multiple times in the functions below self.regex = re.compile(";(.+)=") try: while 1: # Get DB info self.getDbInfo() # Create the DB object self.db = DB(self.dbHost, c.DEFAULT_DATABASE, self.dbTable, self.dbUser, self.dbPass) # Connect to the database connectStatus = self.connectToDatabase() # If we failed to connect to the database offer to re-enter db info if connectStatus != c.SUCCESS: reenter = raw_input("Failed to connect to database. Re-enter database info? (Y,n) ") if reenter.lower() == "n": print "Bye." sys.exit(0) else: break # Start the main menu loop self.displayMenu() except KeyboardInterrupt: pass finally: print "Cleaning up and exiting..." if self.db is not None: self.db.close()
class TextUI: def __init__(self): self.db = None def start(self): # Compile the regex for pulling the card ID from all the data on a card # Do this here so it isn't done multiple times in the functions below self.regex = re.compile(";(.+)=") try: while 1: # Get DB info self.getDbInfo() # Create the DB object self.db = DB(self.dbHost, c.DEFAULT_DATABASE, self.dbTable, self.dbUser, self.dbPass) # Connect to the database connectStatus = self.connectToDatabase() # If we failed to connect to the database offer to re-enter db info if connectStatus != c.SUCCESS: reenter = raw_input("Failed to connect to database. Re-enter database info? (Y,n) ") if reenter.lower() == "n": print "Bye." sys.exit(0) else: break # Start the main menu loop self.displayMenu() except KeyboardInterrupt: pass finally: print "Cleaning up and exiting..." if self.db is not None: self.db.close() def displayMenu(self): print "\nType \"back\" at any time to go up a menu level." while 1: # Display main menu print "\n\t1.) Check-in\n\t2.) Show Points\n\t3.) Exit" try: option = raw_input("\n>> ") if option == "1": self.checkin() elif option == "2": self.showPoints() elif option == "3": sys.exit(0) elif option == "back" or option == "exit": exit = raw_input("Exit? (y,N) ") if exit.lower() == "y": sys.exit(0) else: self.invalidInput() except ValueError: self.invalidInput() def connectToDatabase(self): # Use stdout.write to prevent newline sys.stdout.write("Connecting to database...") # Connect to the DB! status = self.db.connect() if status == c.SUCCESS: print "done." return status elif status == c.BAD_PASSWD: print "\nError connecting to database: Bad username or password." return status else: print "\nUnknown Error connecting to database." return c.FAILURE def checkin(self): # Get and validate the point value for this check-in # Limited to 500 points to prevent bad typos while 1: pointValue = SharedUtils.sanitizeInput(raw_input("\nPoint Value (" + str(c.DEFAULT_POINTS) + "): ")) # Validate point input if pointValue == "": pointValue = str(c.DEFAULT_POINTS) break elif (pointValue.isdigit() and int(pointValue) <= 500) or pointValue == "back": break else: print "Invalid input. Try again." while 1: cardID = self.getCardSwipe() # If the user requested to exit the loop, break if cardID == c.BACK: break elif cardID == c.ERROR_READING_CARD: print "Error reading card. Swipe card again." continue # Sanitize cardID cardID = SharedUtils.sanitizeInput(cardID) # cardID will be empty if it failed sanitization. Skip checkin if that is the case if cardID == "": continue # Do the checkin checkinResult = self.db.checkin(cardID, pointValue) if checkinResult["checkinStatus"] == c.SQL_ERROR: self.showDatabaseError(checkinResult["sqlError"]) elif checkinResult["checkinStatus"] == c.BAD_CHECKIN_TIME: print "Error: You may only check-in once per hour." elif checkinResult["checkinStatus"] == c.FUTURE_CHECKIN_TIME: print "Error: Previous check-in time was in the future. Check your local system time." elif checkinResult["checkinStatus"] == c.CARD_NOT_IN_DB: # Ask if user wants to add the card addCard = raw_input("Error: Card not found in database. Add it now? (Y,n) ") if addCard == "n": continue # Get the accessID for the new card accessID = SharedUtils.sanitizeInput(raw_input("Access ID: ")) # Add the card addCardResult = self.db.addCard(cardID, accessID, pointValue) if addCardResult["addCardStatus"] == c.SUCCESS: self.showCheckinConfirmation(accessID, pointValue) elif addCardResult["addCardStatus"] == c.SQL_ERROR: self.showDatabaseError(addCardResult["sqlError"]) elif checkinResult["checkinStatus"] == c.SUCCESS: self.showCheckinConfirmation(checkinResult["accessID"], pointValue) else: print "Unknown error checking in." def showPoints(self): accessID = SharedUtils.sanitizeInput(raw_input("\nAccess ID (blank for all): ")) showPointsResult = self.db.showPoints(accessID) if showPointsResult["showPointsStatus"] == c.SQL_ERROR: self.showDatabaseError(showPointsResult["sqlError"]) elif showPointsResult["showPointsStatus"] == c.NO_RESULTS: print "\nThere were no results to that query." elif showPointsResult["showPointsStatus"] == c.SUCCESS: # If showing all users, display a pretty table if accessID == "": print "\n+--------------------+\n| Access ID | Points |\n+--------------------+" for i in range(len(showPointsResult["pointsTuple"])): print "|%10s | %6s |" % (showPointsResult["pointsTuple"][i][0], showPointsResult["pointsTuple"][i][1]) print "+--------------------+" # Show a single user's points else: print "\n%s has %s points." % (accessID, str(showPointsResult["pointsTuple"][0][0])) def getCardSwipe(self): # Read the card data as a password so it doesn't show on the screen cardID = SharedUtils.sanitizeInput(getpass.getpass("\nWaiting for card swipe...")) try: # Return the card ID return self.regex.search(cardID).group(1) except AttributeError: # If exit or back, just return to go back if "exit" in cardID or "back" in cardID: return c.BACK # Else, a match wasn't found which probably means there was # and error reading the card or the card isn't a PSU ID card # but assume the former else: return c.ERROR_READING_CARD def getDbInfo(self): self.dbHost = raw_input("Database host: (" + c.DEFAULT_HOST + ") ") if self.dbHost == "": self.dbHost = c.DEFAULT_HOST self.dbTable = raw_input("Database table: (" + c.DEFAULT_TABLE + ") ") if self.dbTable == "": self.dbTable = c.DEFAULT_TABLE self.dbUser = raw_input("Database Username: (" + c.DEFAULT_USER + ") ") if self.dbUser == "": self.dbUser = c.DEFAULT_USER while 1: self.dbPass = getpass.getpass("Database Password: "******"": print "Database password cannot be blank." else: break def showCheckinConfirmation(self, accessID, pointValue): print "%s +%s points" % (accessID, pointValue) def showDatabaseError(self, error): print "\nWARNING! Database error:\n%s" % (error.args[1]) def invalidInput(self): print "Invalid option. Try again."
def get_author_by_sql(self, typee, ids): db = DB() return db.getAuthors(typee, ids) # 使用MySQL获取信息
def expertDocsSort(self, expertId, txt, topN): vec = self.t2v.text2v(txt, self.cuttor) annoy = AnnoyIndex(200) count = 0 annoy.add_item(count, vec) count = count + 1 db = DB() papers = db.getPapers(expertId) for p in papers: p[3] = self.t2v.text2v(p[1] + p[2], self.cuttor) if p[3] is not None: annoy.add_item(count, p[3]) p[3] = annoy.get_distance(0, count) count = count + 1 papers = sorted(papers, key=lambda p: p[3]) papersFormated = [] for p in papers: if len(papersFormated) == topN: break map = {} if p[0] is not None: map['paperId'] = p[0].encode('utf8') else: map['paperId'] = p[0] if p[1] is not None: map['name'] = p[1].encode('utf8') else: map['name'] = p[1] if p[4] is not None: map['authors'] = p[4].encode('utf8') else: map['authors'] = p[4] if p[5] is not None: map['journalName'] = p[5].encode('utf8') else: map['journalName'] = p[5] if p[6] is not None: map['year'] = p[6].encode('utf8') else: map['year'] = p[6] papersFormated.append(map) count = 0 annoy.unload() annoy.add_item(count, vec) count = count + 1 patents = db.getPatents(expertId) for p in patents: p[3] = self.t2v.text2v(p[1] + p[2], self.cuttor) if p[3] is not None: annoy.add_item(count, p[3]) p[3] = annoy.get_distance(0, count) count = count + 1 patents = sorted(patents, key=lambda p: p[3]) patentsFormated = [] for p in patents: if len(patentsFormated) == topN: break map = {} if p[0] is not None: map['patentId'] = p[0].encode('utf8') else: map['patentId'] = p[0] if p[4] is not None: map['publicationNo'] = p[4].encode('utf8') else: map['publicationNo'] = p[4] if p[1] is not None: map['name'] = p[1].encode('utf8') else: map['name'] = p[1] if p[5] is not None: map['inventors'] = p[5].encode('utf8') else: map['inventors'] = p[5] if p[6] is not None: map['applicant'] = p[6].encode('utf8') else: map['applicant'] = p[6] if p[7] is not None: map['year'] = p[7].encode('utf8') else: map['year'] = p[7] patentsFormated.append(map) count = 0 annoy.unload() annoy.add_item(count, vec) count = count + 1 projects = db.getProjects(expertId) for p in projects: p[3] = self.t2v.text2v(p[1] + p[2], self.cuttor) if p[3] is not None: annoy.add_item(count, p[3]) p[3] = annoy.get_distance(0, count) count = count + 1 projects = sorted(projects, key=lambda p: p[3]) projectsFormated = [] for p in projects: if len(projectsFormated) == topN: break map = {} if p[0] is not None: map['projectId'] = p[0].encode('utf8') else: map['projectId'] = p[0] if p[1] is not None: map['name'] = p[1].encode('utf8') else: map['name'] = p[1] if p[4] is not None: map['member'] = p[4].encode('utf8') else: map['member'] = p[4] if p[5] is not None: map['unit'] = p[5].encode('utf8') else: map['unit'] = p[5] if p[6] is not None: map['year'] = p[6].encode('utf8') else: map['year'] = p[6] if p[7] is not None: map['type'] = p[7].encode('utf8') else: map['type'] = p[7] projectsFormated.append(map) result = {} result['papers'] = papersFormated result['patents'] = patentsFormated result['projects'] = projectsFormated return result
class Recommander(object): def __init__(self, vec_file, pap, pat, pro): # self.wm = gensim.models.KeyedVectors.load_word2vec_format(vec_file,binary=True) self.wm = gensim.models.word2vec.Word2Vec.load_word2vec_format( vec_file, binary=True) self.paper_index = AnnoyIndexer() self.paper_index.load(pap) self.patent_index = AnnoyIndexer() self.patent_index.load(pat) self.project_index = AnnoyIndexer() self.project_index.load(pro) self.t2v = Convert2Vec(self.wm) self.cuttor = FilterCut() self.db = DB() self.featureIndex = self.buildFeatureIndex() def buildFeatureIndex(self): paperFeature = open( "/testdata400/data/recommender/data0828/feature/paper_feature.txt", 'r') patentFeature = open( "/testdata400/data/recommender/data0828/feature/patent_feature.txt", 'r') projectFeature = open( "/testdata400/data/recommender/data0828/feature/project_feature.txt", 'r') featureIndex = {} featureIndex['paper'] = self.loadFeature(paperFeature) featureIndex['patent'] = self.loadFeature(patentFeature) featureIndex['project'] = self.loadFeature(projectFeature) return featureIndex def loadFeature(self, file): file = file.readlines() index = {} index['field'] = {} index['type'] = {} index['province'] = {} index['unit'] = {} for line in file: feature = line.split('\t') if feature[1] not in index['field']: index['field'][feature[1]] = [] index['field'][feature[1]].append(feature[0]) if feature[2] not in index['type']: index['type'][feature[2]] = [] index['type'][feature[2]].append(feature[0]) if feature[3] not in index['province']: index['province'][feature[3]] = [] index['province'][feature[3]].append(feature[0]) if feature[4] not in index['unit']: index['unit'][feature[4]] = [] index['unit'][feature[4]].append(feature[0]) return index # 过滤论文,项目,专利 def filter(self, typee, topDocs, filterParams, topN): topDocIds = [i for i, j in topDocs] if not (filterParams[0] == '' or filterParams[0] == '-1' or typee == 'project'): # field, 项目没有type,不用过滤,参数为空字符串或者-1表示不过滤 if filterParams[0] not in self.featureIndex[typee]['field']: topDocIds = [] topDocIds = list( set(topDocIds).intersection( self.featureIndex[typee]['field'][filterParams[0]])) if not (filterParams[1] == '' or filterParams[1] == '-1'): # type if filterParams[1] not in self.featureIndex[typee]['type']: topDocIds = [] topDocIds = list( set(topDocIds).intersection( self.featureIndex[typee]['type'][filterParams[1]])) if not (filterParams[2] == '' or filterParams[2] == '-1'): # province if filterParams[2] not in self.featureIndex[typee]['province']: topDocIds = [] topDocIds = list( set(topDocIds).intersection( self.featureIndex[typee]['province'][filterParams[2]])) if not (filterParams[3] == '' or filterParams[3] == '-1'): # unit if filterParams[3] not in self.featureIndex[typee]['unit']: topDocIds = [] topDocIds = list( set(topDocIds).intersection( self.featureIndex[typee]['unit'][filterParams[3]])) result = [] for i in topDocs: if i[0] in topDocIds: result.append(i) if len(result) == topN: break return result # 不过滤地区,且返回全部满足的文档,而不仅仅是topn个文档 # def filterForExpert(self, typee, topDocs, filterParams): # topDocIds = [i for i,j in topDocs] # if not (filterParams[0] == '' or filterParams[ # 0] == '-1' or typee == 'project'): # field, 项目没有type,不用过滤,参数为空字符串或者-1表示不过滤 # if filterParams[0] not in self.featureIndex[typee]['field']: # topDocIds = [] # topDocIds = list(set(topDocIds).intersection(self.featureIndex[typee]['field'][filterParams[0]])) # if not (filterParams[1] == '' or filterParams[1] == '-1'): # type # if filterParams[1] not in self.featureIndex[typee]['type']: # topDocIds = [] # topDocIds = list(set(topDocIds).intersection(self.featureIndex[typee]['type'][filterParams[1]])) # if not (filterParams[3] == '' or filterParams[3] == '-1'): # unit # if filterParams[3] not in self.featureIndex[typee]['unit']: # topDocIds = [] # topDocIds = list(set(topDocIds).intersection(self.featureIndex[typee]['unit'][filterParams[3]])) # result = [] # # topDocsMap = {} # for i in range(len(topDocs)): # topDocsMap[topDocs[i][0]]=topDocs[i][1] # for id in topDocIds: # listTemp = [id,topDocsMap[id]] # result.append(listTemp) # return result def most_similar_paper(self, text, topn=10): vec = self.t2v.text2v(text, self.cuttor) return self.paper_index.most_similar(vec, topn) def most_similar_patent(self, text, topn=10): vec = self.t2v.text2v(text, self.cuttor) return self.patent_index.most_similar(vec, topn) def most_similar_project(self, text, topn=10): vec = self.t2v.text2v(text, self.cuttor) return self.project_index.most_similar(vec, topn) def getSimExpertsIds(self, topDocs): expertInfoOut = {} expertMap = {} authorSeqWeiht = [1.0, 0.85, 0.7, 0.5] for typee in topDocs: order = {} order[typee] = {} k = 0 for i, j in topDocs[typee]: order[typee][i] = k k = k + 1 ids = [i for i, j in topDocs[typee]] docExpertIds = self.db.getAuthors(typee, ids) for id in docExpertIds: if not self.db.idInDB(typee, id): print "docId:" + id + "is not in db" continue expertIds = docExpertIds[id] qs = 1.0 sim = qs for i, j in topDocs[typee]: if i == id: sim = j * sim break for i in range(len(expertIds)): if i >= 4: # 一个成果考虑4个作者 break if expertIds[i] not in expertInfoOut: expertInfoOut[expertIds[i]] = [] expertInfoOut[expertIds[i]].append([ typee + str(order[typee][id]), sim * authorSeqWeiht[i], i ]) if expertIds[i] not in expertMap: expertMap[expertIds[i]] = [] expertMap[expertIds[i]].append(sim * authorSeqWeiht[i]) return expertMap, expertInfoOut # 从成果提取专家,有些专家在不过滤省份时排在前,但过滤省份后排在后,为避免此情况,先不过滤成果的地区, # 从这些不过滤地区的成果中提取专家,再按地区过滤专家,若不足topN,再在过滤地区的成果中找剩余的专家 # # 这个函数需要重构,但是八成需求会改,所以先不重构了 def most_similar_expert(self, topPapers, topPatents, topProjects, filterParams, expertTopN): file = open("config.ini", 'r') config = ConfigParser.ConfigParser() config.readfp(file) LEN = int(config.get('global', 'len')) # 对于一个专家要计算多少他的成果 COE = float(config.get('global', 'coe')) # 对于一个专家,从第二个的成果相似度乘的系数 topDocs = {} topDocs['paper'] = self.filter('paper', topPapers, filterParams, 50) topDocs['patent'] = self.filter('patent', topPatents, filterParams, 50) topDocs['project'] = self.filter('project', topProjects, filterParams, 15) expertMap, expertInfoOut = self.getSimExpertsIds( topDocs) # 专家id为key,各项成果的相似度list为value expertScoreMap = {} # 专家为key,评分为value for expert in expertMap: expertMap[expert].sort(reverse=True) sim = expertMap[expert][0] for i in range(1, len(expertMap[expert])): if i >= LEN: break sim = sim + COE * expertMap[expert][i] expertScoreMap[expert] = sim result = sorted(expertScoreMap.items(), key=lambda item: item[1], reverse=True)[0:expertTopN] out = [] for i in result: if i[0] in expertInfoOut: out.append({i[0]: expertInfoOut[i[0]]}) # out[i[0]]=expertInfoOut[i[0]] self.printOut(out, LEN) return result def printOut(self, out, l): name = str('log/' + time.strftime("%Y-%m-%d %H-%M-%S" + ".txt", time.localtime())) print name output = open(name, 'w') for expert in out: for i in expert: list = expert[i] expert[i] = sorted(list, key=lambda doc: doc[1], reverse=True)[0:l] for expert in out: for i in expert: # print i # 作者id output.write(i + '\n') list = expert[i] # list为doc信息 docOrder = '' for j in list: docOrder = docOrder + j[0] + ' ' # print docOrder output.write(docOrder + '\n') sim = '' for j in list: sim = sim + str(j[1]) + ' ' # print sim output.write(sim + '\n') expertOrder = '' for j in list: expertOrder = expertOrder + str( j[2]) + ' ' # print expertOrder output.write(expertOrder + '\n') output.write("\n") output.close() # def most_similar_expert(self, text, topDocs): # expertMap = self.getSimExpertsIds(topDocs) # 专家id为key,各项成果的相似度list为value # expertScoreMap = {} # 专家为key,评分为value # for expert in expertMap: # expertMap[expert].sort(reverse=True) # sim = expertMap[expert][0] # for i in range(1, len(expertMap[expert])): # if i >= 4: # break # sim = sim + 0.04 * expertMap[expert][i] # expertScoreMap[expert] = sim # return sorted(expertScoreMap.items(), key=lambda item: item[1], reverse=True) def get_model(self): return self.wm def get_cuttor(self): return self.cuttor