示例#1
0
    def find_keywords(self):
        root_set = set()
        for root in self.trees:
            pid_str = ",".join(str(pid) for pid in root.id_set);
            sql = "select content from keyword inner join keyword_paper_relation on keyword.id = kid where pid in (%s)" % pid_str
            result = self.database.executeSQL(sql)
            
            for row in result:
                root.keyword_list.append(row[0])

            root.keyword = calc.CalcFatherv2(root.keyword_list, root_set)
            root_set.add(root.keyword)
            if root.keyword == '':
                print '@@@@', root.keyword_list
            else:
                print root.keyword, root.level, root.group_id

        for root in self.trees:
            is_used_keywords = set() | root_set

            node_queue = Queue.Queue(0) #0 means no max length queue
            for child in root.children_nodes:
                node_queue.put(child)

            while not node_queue.empty():
                node = node_queue.get()

                pid_str = ",".join(str(pid) for pid in node.id_set);
                sql = "select content from keyword inner join keyword_paper_relation on keyword.id = kid where pid in (%s)" % pid_str
                result = self.database.executeSQL(sql)

                for row in result:
                    node.keyword_list.append(row[0])

                node.keyword = calc.CalcFatherv2(node.keyword_list, is_used_keywords)
                is_used_keywords.add(node.keyword)
                if node.keyword == '':
                    print '@@@@', node.keyword_list
                else:
                    print node.keyword, node.level, node.group_id

                for child in node.children_nodes:
                    node_queue.put(child)

        print 'find keywords done.'
示例#2
0
def start():
    connectDB()

    calc.init_similarity()

    createSourceFile()
    callBGLL()
    getAllResult()

    clearHierarchyRelation()

    # group: the id of each tree
    for (group, pid_set) in result_paper_cate[total_level].items():
        print "======================level:%s====================" % group
        is_selected = set()
        updateHierarchyRelation(group, pid_set, total_level, -1, is_selected)

    closeDB()
示例#3
0
def start():
	connectDB()

	calc.init_similarity()

	createSourceFile()
	callBGLL()
	getAllResult()
	
	clearHierarchyRelation()

	# group: the id of each tree
	for (group, pid_set) in result_paper_cate[total_level].items():
		print "======================level:%s====================" % group
		is_selected = set()
		updateHierarchyRelation(group, pid_set, total_level, -1, is_selected)

	closeDB()
示例#4
0
    def __init__(self):
        self.BGLL     = BGLL(self.output_path, self.is_weighted)
        self.database = Database(self.db_host, self.db_user, self.db_pass, self.db_name)

        calc.init_similarity()
示例#5
0
import MySQLdb
import os
import CalcFather as calc

# database config
db_host = "localhost"
db_user = "******"
db_pass = "******"
db_name = "papernet"

# global database connection and cursor
conn = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = conn.cursor()

calc.init_similarity()

# read the database paper_paper_relation and write into paper.txt
map1 = dict()	# from old to new
map2 = dict()	# from new to old

file_path = '/home/cowx/workspace/BGLL/paper/paper.txt'
f = open(file_path, 'w+')

sql = "select pid1, pid2 from paper_paper_relation order by pid1, pid2"
cursor.execute(sql)
result = cursor.fetchall()

pid_set = set() # the set of all nodes
for row in result:
	pid_set.add(row[0])
	pid_set.add(row[1])
示例#6
0
import MySQLdb
import os
import CalcFather as calc

# database config
db_host = "localhost"
db_user = "******"
db_pass = "******"
db_name = "papernet"

# global database connection and cursor
conn = MySQLdb.connect(db_host, db_user, db_pass, db_name)
cursor = conn.cursor()

calc.init_similarity()

# read the database paper_paper_relation and write into paper.txt
map1 = dict()	# from old to new
map2 = dict()	# from new to old

file_path = '/home/cowx/workspace/BGLL/paper/paper.txt'
f = open(file_path, 'w+')

sql = "select pid1, pid2 from paper_paper_relation order by pid1, pid2"
cursor.execute(sql)
result = cursor.fetchall()

pid_set = set() # the set of all nodes
for row in result:
	pid_set.add(row[0])
	pid_set.add(row[1])
示例#7
0
            try:
                sql = 'insert into keyword_hierarchy_relation(group_id, father, child) values(%d, %d, %d)' % (
                    int(group), father, kid)
                executeSQL(sql)
            except Exception, e:
                if str(e).find('1062') != -1:  #duplicate key
                    sql = 'update keyword_hierarchy_relation set father = %d where group_id = %d and child = %d' % (
                        father, int(group), kid)
                    executeSQL(sql)
                else:
                    print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
                    print sql
                    print e
                    print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'

    content = calc.CalcFatherv2(content_list, is_selected)

    sql = 'select id from keyword where content = "%s"' % content
    try:
        result = executeSQL(sql)
        is_selected.add(content)
    except Exception, e:
        print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
        print sql
        print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'

    print "%s %d %d" % (content, father, level)
    # if content == "":
    # print "[pids:%s content:%s]" % (pid_set, content_list)

    if level >= 1 and content != "":