def get_features_db_parallel(conn, rank, table_name, feature_name):
    if "##DataTable##" in feature_name:
	feature_name = feature_name.replace("##DataTable##", table_name)
    if not data_io.table_view_existence_db(feature_name, conn):	# feature view/table not exist
	print "view " + feature_name + " not exist, creating..."
	data_io.create_table(feature_name, conn)	# create feature view
    if table_name in feature_name:
	feature_name = feature_name.replace(table_name, "_")
    print "getting " + feature_name + " ..."
    query = open("feature_parallel/" + feature_name + ".sql").read().strip()
    query = query.replace("##DataTable##", table_name)
    cursor = conn.cursor()
    cursor.execute(query)
    # NOTE: get the column vector, for better adjunction of matrix we use the numpy package
    res = map(list, cursor.fetchall())
    # return row vector for adjunction
    res = map(list, np.array(res).T)
    return res
def get_features_db_parallel(conn, rank, table_name, feature_name):
    if "##DataTable##" in feature_name:
        feature_name = feature_name.replace("##DataTable##", table_name)
    if not data_io.table_view_existence_db(
            feature_name, conn):  # feature view/table not exist
        print "view " + feature_name + " not exist, creating..."
        data_io.create_table(feature_name, conn)  # create feature view
    if table_name in feature_name:
        feature_name = feature_name.replace(table_name, "_")
    print "getting " + feature_name + " ..."
    query = open("feature_parallel/" + feature_name + ".sql").read().strip()
    query = query.replace("##DataTable##", table_name)
    cursor = conn.cursor()
    cursor.execute(query)
    # NOTE: get the column vector, for better adjunction of matrix we use the numpy package
    res = map(list, cursor.fetchall())
    # return row vector for adjunction
    res = map(list, np.array(res).T)
    return res
def find_coauthor(conn, authorid, table_name):
    # finding coauthors confirmed/deleted by some other authors
    # to be modified: matching
    if table_name == 'trainconfirmed':
	table_name = 'TrainConfirmed'
    if table_name == 'traindeleted':
	table_name = 'TrainDeleted'
    feature_view_name = table_name + 'CoAuthors'
    if not data_io.table_view_existence_db(feature_view_name, conn):	# feature view/table not exist
	    data_io.create_view(feature_view_name, conn)	# create feature view
    query = """
	    SELECT Author2 FROM ##table_name##CoAuthors WHERE Author1 = ##authorid##
	    """
    query = query.replace("##table_name##", table_name)
    query = query.replace("##authorid##", str(authorid))
    cursor = conn.cursor()
    cursor.execute(query)
    res = cursor.fetchall()	# list of tuples
    return res
Exemplo n.º 4
0
def find_coauthor(conn, authorid, table_name):
    # finding coauthors confirmed/deleted by some other authors
    # to be modified: matching
    if table_name == 'trainconfirmed':
        table_name = 'TrainConfirmed'
    if table_name == 'traindeleted':
        table_name = 'TrainDeleted'
    feature_view_name = table_name + 'CoAuthors'
    if not data_io.table_view_existence_db(
            feature_view_name, conn):  # feature view/table not exist
        data_io.create_view(feature_view_name, conn)  # create feature view
    query = """
	    SELECT Author2 FROM ##table_name##CoAuthors WHERE Author1 = ##authorid##
	    """
    query = query.replace("##table_name##", table_name)
    query = query.replace("##authorid##", str(authorid))
    cursor = conn.cursor()
    cursor.execute(query)
    res = cursor.fetchall()  # list of tuples
    return res
Exemplo n.º 5
0
def main():
    conn = data_io.get_db_conn()
    cursor = conn.cursor()
    if not data_io.table_view_existence_db('AP_features', conn):
	query = """
		CREATE TABLE AP_features (
		Result int, authorid bigint, paperid bigint, AP float, AP_PP float, AP_PJ_JP 			float, AP_PC_CP float, AP_PJ_JJ_JP float, AP_PC_CC_CP float)
		"""
	cursor.execute(query)
	conn.commit()
    query = """
	    COPY AP_features FROM '##path##sampleTrain.txt' DELIMITER ' '
	    """
    query = query.replace('##path##', '/home/yingzhen/Projects/KDDCUP2013/benchmark/PythonBenchmark/')
    cursor.execute(query)
    conn.commit()
    query = """
	    SELECT * FROM AP_features LIMIT 3
	    """
    cursor.execute(query)
    res = cursor.fetchall()
    return res
def main():
    conn = data_io.get_db_conn()
    cursor = conn.cursor()
    if not data_io.table_view_existence_db('AP_features', conn):
        query = """
		CREATE TABLE AP_features (
		Result int, authorid bigint, paperid bigint, AP float, AP_PP float, AP_PJ_JP 			float, AP_PC_CP float, AP_PJ_JJ_JP float, AP_PC_CC_CP float)
		"""
        cursor.execute(query)
        conn.commit()
    query = """
	    COPY AP_features FROM '##path##sampleTrain.txt' DELIMITER ' '
	    """
    query = query.replace(
        '##path##',
        '/home/yingzhen/Projects/KDDCUP2013/benchmark/PythonBenchmark/')
    cursor.execute(query)
    conn.commit()
    query = """
	    SELECT * FROM AP_features LIMIT 3
	    """
    cursor.execute(query)
    res = cursor.fetchall()
    return res