Exemplos de read_csv_list em Python, exemplos de common_utils.read_csv_list em Python

Exemplo n.º 1

0

Exibir arquivo

def plot_network_chartz():
    i = 0
    lst = read_csv_list("results_email_comp.csv")[1:1000]
    lst = [x for x in lst if float(x[-1]) < float(10000)]
    print(len(lst))
    df = pd.DataFrame({'from': [x[0] for x in lst], 'to': [x[1] for x in lst]})
    # Build your graph
    #G=nx.from_pandas_dataframe(df, 'from', 'to')
    G = nx.from_pandas_edgelist(df, 'from', 'to')
    plt.figure(figsize=(50, 50))
    node_color = [100 * G.degree(node) for node in G]
    node_size = [1000 * G.degree(node) for node in G]
    #pos = nx.spring_layout(G, k=0.04)

    graph = nx.draw_spring(G,
                           k=0.14,
                           with_labels=True,
                           node_size=node_size,
                           node_color=node_color,
                           node_shape="o",
                           alpha=0.5,
                           linewidths=4,
                           font_size=25,
                           font_color="black",
                           font_weight="bold",
                           width=2,
                           edge_color="grey")
    plt.savefig("graphs/Graph_Email.png", format="PNG")

Exemplo n.º 2

0

Exibir arquivo

def gen_coincidences(do):
	directory = "multfs_users/"
	lst_users = read_csv_list("multfs.csv")[1:]
	create_dir(directory)
	dictios_of_users = []
	value_inds = []
	user_inds = []
	
	pairs = lst_users[:5]

	for _id in do:
		dictio_of_users = unpickle_object(_id + "_files/clean_dictio_of_users.pkl")
		dictios_of_users.append(dictio_of_users)
		print(len(dictio_of_users))
		value_ind = unpickle_object(_id + "_files/clean_value_ind.pkl")
		value_inds.append(value_ind)
		user_ind = unpickle_object(_id + "_files/clean_user_ind.pkl")
		user_inds.append(user_ind)

	for index, (u1, u2, _, _, _, _) in enumerate(pairs):
		print("Going for %d" % (index), u1, u2)
		uname1, rg1, lp1 = get_username(u1)
		uname2, rg2, lp2 = get_username(u2)
		directory2 = directory + "%s(%s)-%s(%s)/" %(u1, uname1, u2, uname2)
		create_dir(directory2)
		coins = get_coincidences_for_pair(u1, u2, dictios_of_users, user_inds, value_inds)
		print(coins)
		gen_post_coincidences(coins, u1, u2, directory2)

Exemplo n.º 3

0

Exibir arquivo

    def join_all_results(self):
        dictio_of_results = dict()
        tic = time.time()
        toc = time.time()
        for indi, tup in enumerate(self.list_files):
            filename, prefix = tup[0], tup[1]
            print("[-] Going for file: %d - %s" % (indi, filename))

            lst_results = read_csv_list(filename)[1:]
            filelen = len(lst_results)
            print("[+] Sorting list")
            lst_results = sorted(lst_results,
                                 key=lambda x: x[0] + x[1],
                                 reverse=False)
            status.create_numbar(100, filelen)
            for indj, entry in enumerate(lst_results):

                status.update_numbar(indj, filelen)
                if not entry[0] in dictio_of_results.keys():
                    dictio_of_results[entry[0]] = dict()
                if not entry[1] in dictio_of_results[entry[0]].keys():
                    dictio_of_results[entry[0]][entry[1]] = dict()

                dictio_of_results[entry[0]][entry[1]][prefix] = float(entry[2])

            status.end_numbar()

            print("[+] Ended with file: %d - %s in %d seconds" %
                  (indi, filename, time.time() - tic))

        return dictio_of_results

Exemplo n.º 4

0

Exibir arquivo

def simplify_list():
    dirname = 'trigram_files/'
    lst = read_csv_list(dirname + "user_to_trigrams_complex.csv")[1:]
    lst = [(x[0], ) + tuple([int(y.split(':')[0]) for y in x[1:]])
           for x in lst]
    gen_csv_from_tuples(dirname + 'user_to_trigrams.csv',
                        ['user', 'trigrams_#'], lst)

Exemplo n.º 5

0

Exibir arquivo

def user_removal_based_on_participation():

    keep_users_p = 'num_files/keep_users.pkl'
    if os.path.exists(keep_users_p):
        #print("\tUser participation extraction exists", end='\r')
        keep_users = unpickle_object(keep_users_p)
        #print("\t[END] User participation extraction finished [%d Users to keep]" % (len(keep_users)))
        return keep_users

    lst = read_csv_list('num_files/user_to_num.csv')[1:]
    #print("\t[-] User participation detection.", end='\r')
    users = [i[0] for i in lst]
    # Number of posts per user
    x = np.array([int(x[1]) for x in lst])
    # Characters per user post
    y = [np.array([int(y) for y in x[2:]]) for x in lst]

    # Average characters per post of a user
    z = np.array([i.mean() for i in y if len(i) > 0])
    keep_users = set()
    limi = np.quantile(x, .50)
    limk = np.quantile(z, .50)
    for user, i, k in zip(users, x, z):
        if i > limi or k > limk:
            keep_users.add(user)

    pickle_object(keep_users, keep_users_p)
    #print('[END] Extracted all the user participations [%d]' % (len(keep_users)))
    return keep_users

Exemplo n.º 6

0

Exibir arquivo

 def read_list_with_format(self, filename):
     lst_users = read_csv_list(filename)
     for i in range(len(lst_users)):
         entry = list(lst_users[i])
         for j in range(2, len(entry)):
             entry[j] = float(entry[j])
         lst_users[i] = entry
     return lst_users

Exemplo n.º 7

0

Exibir arquivo

def plot_scoring_data2():
    lst = read_csv_list("results_ip_comp.csv")[1:]
    data = [float(x[-1]) for x in lst]
    print(data[:10])
    #sns_plot = sns.kdeplot(list(range(20)), shade=True)
    sns_plot = sns.distplot(data, kde=False, rug=True)
    fig = sns_plot.get_figure()
    fig.savefig("output.png")

Exemplo n.º 8

0

Exibir arquivo

def extract_all_users():

	print("[-] Extracting users")
	lst = read_csv_list("weighted_average.csv")[1:]
	set_user = set()
	for i in lst:
		set_user.add(i[0])
		set_user.add(i[1])
	return list(set_user), [x[0:3] for x  in lst]

Exemplo n.º 9

0

Exibir arquivo

def plot_connection_graph():
	lst = read_csv_list("results_ip_comp.csv")[1:1000]
	lst = [x for x in lst if float(x[-1]) < float(10000)]
	links = pd.DataFrame({ 'source': [x[0] for x in lst], 'target': [x[1] for x in lst]})
	chord = hv.Chord(links).select(value=(5, None))
	chord.opts(
		opts.Chord(cmap='Category20', edge_cmap='Category20', 
			edge_color=dim('source').str(), labels='name',
			node_color=dim('index').str()))

Exemplo n.º 10

0

Exibir arquivo

def basic_solve_find_solutions(filepath):
    data = read_csv_list(filepath)
    lst = []
    for i, row1 in enumerate(data):
        user1, values1 = row1[0], row1[1:]
        for j, row2 in enumerate(data[i + 1:]):
            user2, values2 = row2[0], row2[1:]
            score = basic_score(values1, values2)
            lst.append((user1, user2, score))
    return lst

Exemplo n.º 11

0

Exibir arquivo

def gen_new_dataset():
    global global_lst
    #lst = read_csv_list("user_skypes.csv")[1:]
    lst = read_csv_list("skype_files/user_to_skype.csv")[1:]
    print("Length of the Dataset: %d" % (len(lst)))
    #pool = mp.Pool(processes=16)
    #lst = pool.map(modify_skype, lst)
    #clean_dataset(lst)
    #global_lst = sorted(lst, key=lambda x: len(x), reverse=True)
    return lst

Exemplo n.º 12

0

Exibir arquivo

def gen_ip_values():

    lst_ips = read_csv_list("ip_files/ip_count.csv")[1:]
    lst_ips = sorted(lst_ips, key=lambda x: x[1], reverse=True)
    print(lst_ips[:3])
    divisions = int(math.ceil(float(len(lst_ips)) / 254.0))
    dictio = {}
    for i in range(0, 254):
        start = i * divisions
        end = (i + 1) * divisions
        for elem in lst_ips[start:end]:
            dictio[elem[0]] = (i + 1)
    print(dictio)
    return dictio

Exemplo n.º 13

0

Exibir arquivo

def get_most_important():
    lst = read_csv_list("weighted_average.csv")[1:]
    dictio_lst = {}
    status.create_numbar(100, len(lst_res2))
    for indi, i in enumerate(lst_res2):
        status.update_numbar(indi, len(lst_res2))
        dictio_lst[i] = read_csv_list(i + "_files/user_to_" + i + ".csv")[1:]
    status.end_numbar()
    final_lst = []
    num = 100
    status.create_numbar(100, num)
    for indi, i in enumerate(lst[:num]):
        status.update_numbar(indi, num)
        userilist = [i[0], i[1], i[2]]
        for key in lst_res2:
            u1, u2 = find_user_list(i[0], i[1], dictio_lst[key])
            if u1 is None or u2 is None:
                continue
            userilist += list(u1.intersection(u2))
        final_lst.append(tuple(userilist))
    status.end_numbar()
    gen_csv_from_tuples("croos_val.csv",
                        ['user_a', 'user_b', 'metric', 'similar_vals'],
                        final_lst)

Exemplo n.º 14

0

Exibir arquivo

def gen_skype_values():

    lst_skypes = read_csv_list("skype_files/skype_count.csv")[1:]
    print("Lenght Skype Count: %d" % (len(lst_skypes)))
    lst_skypes = sorted(lst_skypes, key=lambda x: x[1], reverse=True)
    print(lst_skypes[:3])
    divisions = int(math.ceil(float(len(lst_skypes)) / 254.0))
    dictio = {}
    for i in range(0, 254):
        start = i * divisions
        end = (i + 1) * divisions
        for elem in lst_skypes[start:end]:
            dictio[elem[0]] = (i + 1)
    #print(dictio)
    return dictio

Exemplo n.º 15

0

Exibir arquivo

def generate_graph():
    print("[-] Extracting data")
    lst = read_csv_list("weighted_average.csv")[1:]
    print("[-] Generating list")
    #from_nodes = [x[0] for x in lst]
    #to_nodes = [x[1] for x in lst]
    #weight = [x[2] for x in lst]
    elist = [(x[0], x[1], x[2]) for x in lst if float(x[2]) < 1.0]
    print("[-] Generating graph")
    G = nx.Graph()
    G.add_weighted_edges_from(elist)
    print("[-] Pickling")
    #nx.write_gpickle(G, "graph.pkl")
    nx.write_gexf(G, "graph.gexf")
    return G

Exemplo n.º 16

0

Exibir arquivo

def simplify():
    import adhoc_removal
    from functional import seq
    files = [
        'trigram_files/user_to_trigram.csv',
        'timestamp_files/user_to_timestamp.csv'
    ]
    keep_users = adhoc_removal.keep_users
    for file in files:
        a = read_csv_list(file)
        print("Initial length: %d" % (len(a)))
        a = seq(a).filter(lambda x: x[0] in keep_users).filter(
            lambda x: len(x) > 1)
        a = [tuple(x) for x in a]
        print("Final length: %d" % (len(a)))
        gen_csv_from_tuples(file + '_simple', ['IdAuthor', 'Features'], a)

Exemplo n.º 17

0

Exibir arquivo

    def get_joined_results(self, filename):
        dictio_of_results = {}
        lst_results = read_csv_list(filename)
        head = lst_results[0]
        lst_results = lst_results[1:]
        for entry in lst_results:
            user0, user1 = entry[0], entry[1]
            for indi, prefix in enumerate(head[2:]):
                if not entry[0] in dictio_of_results.keys():
                    dictio_of_results[entry[0]] = dict()
                if not entry[1] in dictio_of_results[entry[0]].keys():
                    dictio_of_results[entry[0]][entry[1]] = dict()
                dictio_of_results[entry[0]][entry[1]][prefix] = float(
                    entry[2 + indi])

        return dictio_of_results, lst_results

Exemplo n.º 18

0

Exibir arquivo

def gen_dictio_from_csv():
    global dictio_of_results
    lst = read_csv_list("combination.csv")
    headers, lst = lst[0][2:], lst[1:]
    status.create_numbar(100, len(lst))
    for indi, i in enumerate(lst):
        status.update_numbar(indi, len(lst))
        if not i[0] in dictio_of_results.keys():
            dictio_of_results[i[0]] = {}
        if not i[1] in dictio_of_results[i[0]].keys():
            dictio_of_results[i[0]][i[1]] = {}

        for indj, j in enumerate(i[2:]):
            dictio_of_results[i[0]][i[1]][headers[indj]] = j
    status.end_numbar()
    return lst

Exemplo n.º 19

0

Exibir arquivo

def generate_directories_for_users():
    print("[>] Creating dir")
    create_dir("Author/")
    print("[>] Reading user csv list")
    lst_users = read_csv_list("weighted_average.csv")[1:]

    #lst_users = [(x[0], x[1] for x in lst_users if float(x[2]) < 0.35)
    ev_set = set()
    for entry in lst_users:
        if float(entry[2]) >= 0.35:
            break
        ev_set.add(entry[0])
        ev_set.add(entry[1])
    #status.create_numbar(100, len(ev_set))
    for ind, user in enumerate(ev_set):
        #status.update_numbar(ind, len(ev_set))
        generate_user_dataset(user, ind, len(ev_set))

Exemplo n.º 20

0

Exibir arquivo

def user_removal_based_on_participation():
	print("[-] Starting user participation detection.")
	lst = read_csv_list('num_files/user_to_num.csv')[1:]

	users = [i[0] for i in lst]
	# Number of posts per user
	x = np.array([int(x[1]) for x in lst])
	# Characters per user post
	y = [np.array([int(y) for y in x[2:]]) for x in lst]

	# Average characters per post of a user
	z = np.array([i.mean() for i in y if len(i) > 0])
	keep_users = set()
	limi = np.quantile(x, .50)
	limk = np.quantile(z, .50)
	for user, i, k in zip(users, x, z):
		if i > limi or k > limk:
			keep_users.add(user)
	print('[+] Extracted all the user participations.')
	return keep_users

Exemplo n.º 21

0

Exibir arquivo

import numpy as np
from common_utils import read_csv_list

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
print("Loading file data.")
lst = read_csv_list('num_files/user_to_num.csv')[1:]
print("Loaded file data.")
x = np.array([int(x[1]) for x in lst])
y = [np.array([int(y) for y in x[2:]]) for x in lst]

print("Created axis")
z = np.array([i.mean() for i in y if len(i) > 0])

xg = [np.quantile(x, i) for i in np.arange(0,1.01, 0.01)]
zg = [np.quantile(z, i) for i in np.arange(0,1.01, 0.01)]

print("XG", xg)
print("ZG", zg)


sns.set(style="whitegrid")
#ax = sns.boxplot(x=x)
ax = sns.distplot(x);
#plt.savefig('your_figure.png')
ax.figure.savefig("image.png")

Exemplo n.º 22

0

Exibir arquivo

def gen_latex_coincidences(do,specific_users=[]):
	directory = "multfs_users/"
	lst_users = read_csv_list("multfs.csv")[1:]
	create_dir(directory)
	dictios_of_users = []
	value_inds = []
	user_inds = []
	file = open('analysis.tex', 'w+')
	header = """\\documentclass[12pt]{article}

\\usepackage[utf8]{inputenc}
\\usepackage[T1]{fontenc}
\\usepackage[USenglish]{babel}

\\usepackage{xcolor} % Colors
\\usepackage{tabularx} % Other type of columns
\\usepackage{caption} 
\\usepackage{hyperref}
\\renewcommand{\\baselinestretch}{1.3}


\\usepackage{minted}

\\title{Found pairs of users}
\\author{-}
\\date{}
\\begin{document}

\\maketitle\n"""

	footer = """
\\end{document}"""

	file.write(header)
	# If not specific users are given, take the first 5. Otherwise, take those from the list
	if specific_users is None:
		pairs = lst_users[:5]
	else:
		pairs=[]
		for tuple_list in lst_users:
			(u1, u2, _, _, _, _)=tuple_list
			for specific_user in specific_users:
				if (u1==specific_user and u2 in specific_users) or (u2==specific_user and u1 in specific_users):
					pairs.append(tuple_list)

	for _id in do:

		dictio_of_users = unpickle_object(_id + "_files/clean_dictio_of_users.pkl")
		dictios_of_users.append(dictio_of_users)
		
		value_ind = unpickle_object(_id + "_files/clean_value_ind.pkl")
		value_inds.append(value_ind)
		user_ind = unpickle_object(_id + "_files/clean_user_ind.pkl")
		user_inds.append(user_ind)
		print("ID: %s" %(_id), len(dictio_of_users), len(user_ind))

	for index, (u1, u2, _, _, _, _) in enumerate(pairs):
		#file1name = "tex/%d.tex"%(index)
		#file1 = open(file1name, 'w+')
		print("Going for %d" % (index), u1, u2)
		uname1, rg1, lp1 = get_username(u1) 
		uname2, rg2, lp2 = get_username(u2)
		file.write("\\section{%s(%s)-%s(%s)} \n" %(u1, uname1, u2, uname2))
		#file.write("\\include{%s}\n" % (file1name))
		coins = get_coincidences_for_pair(u1, u2, dictios_of_users, user_inds, value_inds)
		#print("COINCIDENCES", coins)
		gen_latex_post_coincidences(coins, u1, u2, file)

	file.write(footer)
	file.close()

Exemplo n.º 23

0

Exibir arquivo

def get_dictio_from_file():
	lst = read_csv_list("word_index.csv")[1:]
	return {x[0]:int(x[1]) for x in lst}

Exemplo n.º 24

0

Exibir arquivo

    def gen_data(self):
        tic = time.time()
        #Create the path for storing the dictionaries
        user_ind_p = self.dir + 'user_ind.pkl'
        value_ind_p = self.dir + 'value_ind.pkl'
        dictio_of_users_p = self.dir + 'dictio_of_users.pkl'
        dictio_of_values_p = self.dir + 'dictio_of_values.pkl'
        dictio_of_usage_p = self.dir + 'dictio_of_usage.pkl'

        #Adding files to list for cleanup
        self.cleanup_list.append(user_ind_p), self.cleanup_list.append(
            value_ind_p), self.cleanup_list.append(
                dictio_of_users_p), self.cleanup_list.append(
                    dictio_of_values_p), self.cleanup_list.append(
                        dictio_of_usage_p)

        if self.backup and os.path.exists(user_ind_p) and os.path.exists(
                value_ind_p
        ) and os.path.exists(dictio_of_users_p) and os.path.exists(
                dictio_of_values_p) and os.path.exists(dictio_of_usage_p):
            self.pprint("Data Structures already exist, unpickling.", end='\r')
            user_ind = unpickle_object(user_ind_p)
            value_ind = unpickle_object(value_ind_p)
            dictio_of_users = unpickle_object(dictio_of_users_p)
            dictio_of_values = unpickle_object(dictio_of_values_p)
            # TODO Remove comment
            #dictio_of_usage = unpickle_object(dictio_of_usage_p)
            dictio_of_usage = None
            self.pprint("[END] Data Structures already exist, unpickling.",
                        get_ram(), get_elapsed_time(tic))
            return user_ind, value_ind, dictio_of_users, dictio_of_values, dictio_of_usage

        lst = read_csv_list(self.data)[1:]

        tic = time.time()
        user_ind = {}
        value_ind = {}
        dictio_of_users = {}
        dictio_of_values = {}
        dictio_of_usage = {}
        total = len(lst)
        max_val = np.uint32(0)
        for uind, i in enumerate(lst):
            if uind % 1000 == 0:
                self.pprint("Data Structures Generation",
                            "[%d Users Processed]" % (uind),
                            "[%0.3f Percentage]" % ((uind / total) * 100),
                            get_ram(),
                            get_elapsed_time(tic),
                            end='\r')
            uind = np.uint32(uind)
            user_ind[i[0]] = uind
            user = i[0]
            dictio_of_users[uind] = []
            dictio_of_usage[uind] = []
            for t in i[1:]:
                value, usage = self.separate(t)
                usage = np.uint32(usage)
                if value not in value_ind:
                    value_ind[value] = max_val
                    dictio_of_values[max_val] = []
                    max_val += 1
                vind = value_ind[value]
                dictio_of_values[vind].append(uind)
                dictio_of_users[uind].append(vind)
                dictio_of_usage[uind].append(usage)
        self.pprint("[END] Data Structures Generation",
                    "[%d Users Processed]" % (uind),
                    "[%0.3f Percentage]" % ((uind / total) * 100), get_ram(),
                    get_elapsed_time(tic))

        lst = None  # Freeing space from list, no longer needed

        #self.pprint("[0/5] Storing data structures to disk", get_ram(), get_elapsed_time(tic))
        #pickle_object(user_ind, user_ind_p)
        #self.pprint("[1/5] Storing data structures to disk", get_ram(), get_elapsed_time(tic))
        #pickle_object(value_ind, value_ind_p)
        #self.pprint("[2/5] Storing data structures to disk", get_ram(), get_elapsed_time(tic))
        #pickle_object(dictio_of_users, dictio_of_users_p)
        #self.pprint("[3/5] Storing data structures to disk", get_ram(), get_elapsed_time(tic))
        #pickle_object(dictio_of_values, dictio_of_values_p)
        #self.pprint("[4/5] Storing data structures to disk", get_ram(), get_elapsed_time(tic))
        #pickle_object(dictio_of_usage, dictio_of_usage_p)
        #self.pprint("[END] [5/5] Storing data structures to disk", get_ram(), get_elapsed_time(tic))
        return user_ind, value_ind, dictio_of_users, dictio_of_values, dictio_of_usage

Exemplo n.º 25

0

Exibir arquivo

def generate_word_dictionary():
	print("[-] Generating dictionary")
	lst = read_csv_list("ind_users.csv")[1:]
	dictio = word_from_features(lst)
	store_dictio(dictio)

Exemplo n.º 26

0

Exibir arquivo

def clean_usernames():
	lst = read_csv_list("similar_usernames_full.csv")[1:]
	lst2 = read_csv_list("all_posts_all_users.csv")
	lst = [x for x in lst if x[0] != '']
	lst2 = [x for x in lst2 if x[0] != '']
	total = 0
	count = 0
	dictio = {}
	results = []
	threshold = 20
	for i in lst2:
		#print (i[0:2], i[2])
		dictio[i[0]] = {}
	for i in lst2:
		#print (i[0:2], i[2])
		dictio[i[0]][i[1]] = int(i[2])	
	
	not_both = [x for x in lst if x[0] not in dictio.keys()]
	print(len(dictio), len(list(set([x[0] for x in lst]))), len(not_both), len(not_both)+ len(dictio))

	for i in lst:
		boolean = True
		for j in i[1:]:
			if i[0] in dictio.keys() and dictio[i[0]][j] < threshold:
				boolean = False
		if boolean:
			#print (i)
			results += [i]
		else:
			del dictio[i[0]]
	for i in results:
		if (len (i) > 3):
			count += 1
		if (len (i) > 4):
			total += 1

	print ("At least two: %d" % (len(dictio)), "At least three: %d" % (count) , "At least four: %d" % (total))
	conn = psycopg2.connect(database="crimebb", user=db_username, password=db_password,  host="127.0.0.1", port="5432")
	print("Database Connected....")
	rows_processed = []
	
	status.create_numbar(100, len(dictio))

	for indi, i in enumerate(dictio.keys()):
		#print (multiprocessing.current_process(), "%0.2f %%" % ( indi * 100 / len(lst)))
		status.update_numbar(indi, len(dictio))
		for key in dictio[i].keys():
			cur = conn.cursor()
			cur.execute("""SELECT "Post"."Content"
			from "Post" JOIN "Member" ON "Post"."Author" = "Member"."IdMember"
			WHERE ("Member"."Username" = %s) AND "Member"."Site" = %s;""", (i, int(key)))
			rows = [row[0] for row in cur.fetchall()]
			#print (rows[0])
			tfidf = tf_idf(rows)
			tfidf = sorted(tfidf, key=lambda x: x[-1], reverse=True)
			#print(i, key, tfidf[:3])
			tfidf = [i for j in tfidf[:50] for i in j]
			dictio[i][key] = tuple(tfidf)
			#print (i[0], j, count)
		if indi == 100:
			k = list(dictio.keys())
			rows_processed = [(user, forum) + dictio[user][forum] for user in k[:100] for forum in dictio[user].keys()]
			gen_csv_from_tuples("tfidf_prov.csv", [""], rows_processed)

	status.end_numbar()
	rows_processed = [(user, forum) + dictio[user][forum] for user in dictio.keys() for forum in dictio[user].keys()]
	gen_csv_from_tuples("tfidf.csv", [""], rows_processed)
	conn.close()

Exemplo n.º 27

0

Exibir arquivo

def get_ind_features():
	lst = read_csv_list("ind_users.csv")[1:]
	return {x[0]:x[1:] for x in lst}

Exemplo n.º 28

0

Exibir arquivo

def gen_new_dataset():
    lst = read_csv_list("combination.csv")
    return lst

Exemplo n.º 29

0

Exibir arquivo

 def generate_graph_pickle():
     lst = read_csv_list("weighted_average.csv")[1:]

Exemplo n.º 30

0

Exibir arquivo

def get_gen_fetures():
	lst = read_csv_list("gen_users.csv")[1:]
	return {x[0]+"-"+x[1]:x[2:] for x in lst}