Пример #1
0
def statistics(start_year, end_year):
    
    first_year = 1000
    last_year = start_year - 1
    print "Estatísticas para anos entre %s e %s:" % (start_year, end_year)
    
    # get all papers publisher in year
    first_papers = SbbdDAO.select(SbbdQueries.getPapers(first_year=first_year, last_year=last_year))
    
    # get publishers
    first_publishers = []
    for paper in first_papers:
        paper_id = paper[0]      
        authors = SbbdDAO.select(SbbdQueries.getAuthors(paper_id=paper_id))
        for author in authors:
            if author[0] not in first_publishers:
                first_publishers.append(author[0])
    print "\tArtigos anteriores: %s" % len(first_papers)
    print "\tAutores anteriores: %s" % len(first_publishers)
                
    # get all papers publisher in year
    papers = SbbdDAO.select(SbbdQueries.getPapers(first_year=start_year, last_year=end_year))
    print "\tArtigos: %s" % len(papers)
    
    # get publishers
    publishers = []
    for paper in papers:
        paper_id = paper[0]      
        authors = SbbdDAO.select(SbbdQueries.getAuthors(paper_id=paper_id))
        for author in authors:
            if author[0] not in publishers:
                publishers.append(author[0])
    print "\tTotal Autores: %s" % len(publishers)
    
    # get new publishers
    new_publishers = []
    for p in publishers:
        if p not in first_publishers:
            new_publishers.append(p)
    print "\tNovos Autores: %s" % len(new_publishers)
Пример #2
0
def main():
    coauthorsMap = readCoauthorsMap('/home/lucmir/Desktop/Faculdade/mestrado/periodo2/gerenciaDeDadosWeb/tpfinal/tpfinalgdw/dblpParser/bin/coauthorsMap.txt')
    
    # get all papers publisher in year
    papers = SbbdDAO.select(SbbdQueries.getPapers(first_year=1099, last_year=2004))
    
    # get their publishers
    publishers = []
    for paper in papers:
        paper_id = paper[0]      
        authors = SbbdDAO.select(SbbdQueries.getAuthors(paper_id=paper_id))
        for author in authors:
            publishers.append(author[0])
    publisher_list = "("
    for publisher in set(publishers):
        publisher_list += str(publisher) + ','
    publisher_list = publisher_list.rstrip(',') + ')'
    
    # get publishers coauthors
    publishers = SbbdDAO.select(SbbdQueries.getAuthorsIn(publisher_list))
    coauthor_map_count = {}
    coauthors = []
    publisher_set = {}
    for publisher in publishers:
        publisher_set[publisher[1]] = True
        publisher_set[publisher[6]] = True
        try:
            coauthors += coauthorsMap[publisher[1]];
        except:
            pass
        try:
            coauthors += coauthorsMap[publisher[6]];
        except:
            pass
        if coauthors is not None:
            for coauthor in coauthors:
                if coauthor in coauthor_map_count:
                    coauthor_map_count[coauthor] += 1
                else:
                    coauthor_map_count[coauthor] = 1
        coauthors = []
        
    # eliminate coauthors in publishers
    to_delete = []
    k_count_map = {}
    k_count_map[0] = 479888 - len(publisher_set)
    for key in coauthor_map_count:
        if key in publisher_set:
            to_delete.append(key)
        else:
            if coauthor_map_count[key] not in k_count_map:
                k_count_map[coauthor_map_count[key]] = 1
            else:
                k_count_map[coauthor_map_count[key]] += 1
    for key in to_delete:
        coauthor_map_count.pop(key)
    print k_count_map
    
    # get all papers publisher in year
    papers = SbbdDAO.select(SbbdQueries.getPapers(first_year=2004, last_year=2020))
    
    # get their publishers
    publishers = []
    for paper in papers:
        paper_id = paper[0]      
        authors = SbbdDAO.select(SbbdQueries.getAuthors(paper_id=paper_id))
        for author in authors:
            publishers.append(author[0])
    publisher_list = "("
    for publisher in set(publishers):
        publisher_list += str(publisher) + ','
    publisher_list = publisher_list.rstrip(',') + ')'
    
    # get new publisher set
    publishers = SbbdDAO.select(SbbdQueries.getAuthorsIn(publisher_list))
    new_publisher_set = {}
    for publisher in publishers:
        new_publisher_set[publisher[1]] = True
        new_publisher_set[publisher[6]] = True
    
    # print new authors
    new_k_count_map = {}
    new_k_count_map[0] = 0
    for key in coauthor_map_count:
        if key in new_publisher_set:
            #print key, coauthor_map_count[key]
            if coauthor_map_count[key] not in new_k_count_map:
                new_k_count_map[coauthor_map_count[key]] = 1
            else:
                new_k_count_map[coauthor_map_count[key]] += 1
        else:
            new_k_count_map[0] += 1
    print new_k_count_map
    
    #imprime resumo estatisticas
    for i in new_k_count_map:
        print i, (float(new_k_count_map[i])/float(k_count_map[i]))
    
    print "\n"    
    #imprime resumo estatisticas
    for i in new_k_count_map:
        print i, (float(new_k_count_map[i]))
Пример #3
0
def main2():
    
    outfile = open('/tmp/outfile.txt', 'w')
    
    coauthorsMap = readCoauthorsMap('/home/lucmir/Desktop/Faculdade/mestrado/periodo2/gerenciaDeDadosWeb/tpfinal/tpfinalgdw/dblpParser/bin/coauthorsMap.txt')
    
    # get all papers publisher in year
    papers = SbbdDAO.select(SbbdQueries.getPapers(first_year=1099, last_year=2004))
    
    # get their publishers
    publishers = []
    for paper in papers:
        paper_id = paper[0]      
        authors = SbbdDAO.select(SbbdQueries.getAuthors(paper_id=paper_id))
        for author in authors:
            publishers.append(author[0])
    publisher_list = "("
    for publisher in set(publishers):
        publisher_list += str(publisher) + ','
    publisher_list = publisher_list.rstrip(',') + ')'
    
    # obtem fringe e sua vizinhanca
    publishers = SbbdDAO.select(SbbdQueries.getAuthorsIn(publisher_list))
    fringe = {} # user from fringe -> [list of friends in community]
    coauthors = []
    publisher_set = {}

    # para cada publisher pertencente a comunidade
    for publisher in publishers:
        publisher_set[publisher[1]] = True
        publisher_set[publisher[6]] = True
        try:
            coauthors += coauthorsMap[publisher[1]];
        except:
            pass
        try:
            coauthors += coauthorsMap[publisher[6]];
        except:
            pass
        if coauthors is not None:
            # para cada coauthor de publisher
            for coauthor in coauthors:
                if coauthor in fringe:
                    fringe[coauthor].append(publisher)
                else:
                    fringe[coauthor] = [publisher]
        coauthors = []
        
    # eliminate coauthors in publishers
    to_delete = []
    for key in fringe:
        if key in publisher_set:
            to_delete.append(key)
    for key in to_delete:
        fringe.pop(key)
    outfile.write(str(fringe))
    
    connectivity_map = {}
    for user in fringe:
        user_friends_in_community = fringe[user]
        num_user_friends_in_community = len(user_friends_in_community)
        if(num_user_friends_in_community > 1):
            connectivity_count = 0
            for i in range(0, num_user_friends_in_community-1):
                friend_a = user_friends_in_community[i]
                friend_b = user_friends_in_community[i+1]
                if are_friends(coauthorsMap, friend_a, friend_b):
                    connectivity_count += 1 
            connectivity_map[user] = round(float(connectivity_count)/float(num_user_friends_in_community-1), 1)  
        else:
            connectivity_map[user] = 0.0
    print connectivity_map
    
    # get connectivity_count_map
    connectivity_count_map = {}
    connectivity_count_map[0] = 479888 - len(publisher_set)
    for key in connectivity_map:
        value = connectivity_map[key]
        if value not in connectivity_count_map:
            connectivity_count_map[value] = 1
        else:
            connectivity_count_map[value] += 1
    print connectivity_count_map
    
    # get all papers publisher in year
    papers = SbbdDAO.select(SbbdQueries.getPapers(first_year=2004, last_year=2020))
    
    # get their publishers
    publishers = []
    for paper in papers:
        paper_id = paper[0]      
        authors = SbbdDAO.select(SbbdQueries.getAuthors(paper_id=paper_id))
        for author in authors:
            publishers.append(author[0])
    publisher_list = "("
    for publisher in set(publishers):
        publisher_list += str(publisher) + ','
    publisher_list = publisher_list.rstrip(',') + ')'
    
    # get new publisher set
    publishers = SbbdDAO.select(SbbdQueries.getAuthorsIn(publisher_list))
    new_publisher_set = {}
    for publisher in publishers:
        new_publisher_set[publisher[1]] = True
        new_publisher_set[publisher[6]] = True
    
    # print new authors
    new_connectivity_count_map = {}
    new_connectivity_count_map[0] = 0
    for key in connectivity_map:
        if key in new_publisher_set:
            if connectivity_map[key] not in new_connectivity_count_map:
                new_connectivity_count_map[connectivity_map[key]] = 1
            else:
                new_connectivity_count_map[connectivity_map[key]] += 1
        else:
            new_connectivity_count_map[0] += 1 
    print new_connectivity_count_map
    
    #imprime resumo estatisticas
    for i in new_connectivity_count_map:
        print i, (float(new_connectivity_count_map[i])/float(connectivity_count_map[i]))
    print "\n"
    #imprime resumo estatisticas
    for i in new_connectivity_count_map:
        print i, (float(new_connectivity_count_map[i]))      
        
    # close out file
    outfile.close()