Пример #1
0
def createTree(topicFiles, distanceFiles):
    level = len(topicFiles)
    nodes = []
    parent = []
    for i in range(0, level):
        topics = ioFile.load_object(topicFiles[i])
        # nodes at the bottom level of the tree
        if i == 0:
            [
                nodes.append({
                    "name": ' '.join(topic),
                    "size": 1
                }) for topic in topics
            ]
        else:
            pre_nodes = nodes
            nodes = []
            for j in range(0, len(topics)):
                indexes = np.where(parent == j)[0]
                children = []
                [children.append(pre_nodes[index]) for index in indexes]
                nodes.append({
                    "name": ' '.join(topics[j]),
                    "children": children
                })
        if i < level - 1:
            distances = np.matrix(ioFile.load_object(distanceFiles[i]))
            parent = np.squeeze(np.array(distances.argmin(1)))

    root = {"name": '...', "children": nodes}

    return root
Пример #2
0
def get_classes(class_mode):
    if class_mode == 'acm-class':
        fname = path.join(root_path, 'class_topic', 'acm_class.pkl')
    elif class_mode == 'arxiv-category':
        fname = path.join(root_path, 'class_topic', 'arxiv_category.pkl')
        
    class_list = ioFile.load_object(fname)

    return class_list
Пример #3
0
def get_classes(class_mode):
    if class_mode == 'acm-class':
        fname = path.join(root_path, 'class_topic', 'acm_class.pkl')
    elif class_mode == 'arxiv-category':
        fname = path.join(root_path, 'class_topic', 'arxiv_category.pkl')
        
    class_list = ioFile.load_object(fname)

    return class_list
Пример #4
0
def statistics_for_class(class_mode, class_name):
    if class_mode == 'acm-class':
        clf_topic = ioFile.load_object(path.join(root_path, 'class_topic/class_topic_acm-class.pkl'))
    elif class_mode == 'arxiv-category':
        clf_topic = ioFile.load_object(path.join(root_path, 'class_topic/class_topic_arxiv-category.pkl'))

    clf_topic_stat = []
    years = []
    for year in range(1993, 2016):
        try:
            clf_topic_stat.append(Counter(clf_topic[str(year)][class_name]))
            years.append(year)
        except KeyError:
            print "No documents belonging to %s in %s " % (class_name, year)

    topicFiles = fileSys.traverseTopicDirecotry(model_path, 1, years)

    topic_bar = graph.createBarChat(topicFiles, clf_topic_stat, years)

    return topic_bar
Пример #5
0
def statistics_for_class(class_mode, class_name):
    if class_mode == 'acm-class':
        clf_topic = ioFile.load_object(path.join(root_path, 'class_topic/class_topic_acm-class.pkl'))
    elif class_mode == 'arxiv-category':
        clf_topic = ioFile.load_object(path.join(root_path, 'class_topic/class_topic_arxiv-category.pkl'))

    clf_topic_stat = []
    years = []
    for year in range(1993, 2016):
        try:
            clf_topic_stat.append(Counter(clf_topic[str(year)][class_name]))
            years.append(year)
        except KeyError:
            print "No documents belonging to %s in %s " % (class_name, year)

    topicFiles = fileSys.traverseTopicDirecotry(model_path, 1, years)

    topic_bar = graph.createBarChat(topicFiles, clf_topic_stat, years)

    return topic_bar
Пример #6
0
def createLink(filenames, topic_num, fun, clf_topic_stat=None):
    # start index of each year
    topic_num += 1
    node_index = cumsum(topic_num).tolist()[:-1]
    node_index.insert(0, 0)

    links = []
    i = 0
    for fname in filenames:
        # indexes of first nodes in the graph for year i and i+1
        node_index_i = node_index[i] + 1
        node_index_j = node_index[i + 1] + 1
        # distances between year i and i+1
        distances = ioFile.load_object(fname)
        if fun < 2:
            N = len(distances)
            for index_i in range(0, N):
                clf_topic = np.array(distances[index_i])
                if fun == 0:
                    index = np.where(clf_topic < distance_constraint)[0]
                    for index_j in index:
                        links.append({
                            "source": node_index_i + index_i,
                            "target": node_index_j + index_j,
                            "value": 5
                        })
                elif fun == 1:
                    index = np.where(clf_topic == clf_topic.min())[0][0]
                    links.append({
                        "source": node_index_i + index_i,
                        "target": node_index_j + index,
                        "value": 5
                    })
        elif fun == 2:
            for index_i, count in clf_topic_stat[i].iteritems():
                clf_topic = np.array(distances[index_i])
                index = set(np.where(clf_topic < distance_constraint)[0])
                index = index.intersection(set(clf_topic_stat[i + 1].keys()))
                for index_j in index:
                    links.append({
                        "source":
                        node_index_i + clf_topic_stat[i].keys().index(index_i),
                        "target":
                        node_index_j +
                        clf_topic_stat[i + 1].keys().index(index_j),
                        "value":
                        5
                    })

        i += 1

    #print "finish creating links"
    return links
Пример #7
0
def createBarChat(topicFiles, clf_topic_stat, years):
    N = len(topicFiles)
    bar_data = []
    for i in range(0, N):
        topics = ioFile.load_object(topicFiles[i])
        clf_topic = clf_topic_stat[i]
        year = years[i]
        doc_num, topic_percent = statOfClassification(clf_topic, topics)
        #print topic_percent
        bar_data.append({"year": year, "doc": doc_num, "topics": topic_percent})
        
        
    return bar_data
Пример #8
0
def topics_for_class(class_mode, class_name, start, end):
    if class_mode == 'acm-class':
        clf_topic = ioFile.load_object(path.join(root_path, 'class_topic/class_topic_acm-class.pkl'))
    elif class_mode == 'arxiv-category':
        clf_topic = ioFile.load_object(path.join(root_path, 'class_topic/class_topic_arxiv-category.pkl'))

    clf_topic_stat = []
    topic_num = []
    years = set(range(start, end+1))
    for year in range(start, end+1):
        try:
            clf_topic_stat.append(Counter(clf_topic[str(year)][class_name]))
            topic_num.append(len(set(clf_topic[str(year)][class_name])))
        except KeyError:
            years.remove(year)

    topicFiles = fileSys.traverseTopicDirecotry(model_path, 1, years)
    clf_topic = path.join(root_path, 'class_topic/distance')
    distanceFiles = fileSys.traverseDistanceDirectory(clf_topic, list(years))

    topic_graph = graph.createGraph(topicFiles, distanceFiles, 2, clf_topic_stat, topic_num)

    return topic_graph, years
Пример #9
0
def topics_for_class(class_mode, class_name, start, end):
    if class_mode == 'acm-class':
        clf_topic = ioFile.load_object(path.join(root_path, 'class_topic/class_topic_acm-class.pkl'))
    elif class_mode == 'arxiv-category':
        clf_topic = ioFile.load_object(path.join(root_path, 'class_topic/class_topic_arxiv-category.pkl'))

    clf_topic_stat = []
    topic_num = []
    years = set(range(start, end+1))
    for year in range(start, end+1):
        try:
            clf_topic_stat.append(Counter(clf_topic[str(year)][class_name]))
            topic_num.append(len(set(clf_topic[str(year)][class_name])))
        except KeyError:
            years.remove(year)

    topicFiles = fileSys.traverseTopicDirecotry(model_path, 1, years)
    clf_topic = path.join(root_path, 'class_topic/distance')
    distanceFiles = fileSys.traverseDistanceDirectory(clf_topic, list(years))

    topic_graph = graph.createGraph(topicFiles, distanceFiles, 2, clf_topic_stat, topic_num)

    return topic_graph, years
Пример #10
0
def createTree(topicFiles, distanceFiles):
    level = len(topicFiles)
    nodes = []
    parent = []
    for i in range(0, level):
        topics = ioFile.load_object(topicFiles[i])
        # nodes at the bottom level of the tree
        if i == 0:    
            [nodes.append({"name": ' '.join(topic), "size": 1}) for topic in topics]
        else:
            pre_nodes = nodes
            nodes = []
            for j in range(0, len(topics)):
                indexes = np.where(parent==j)[0]
                children = []
                [children.append(pre_nodes[index]) for index in indexes]
                nodes.append({"name": ' '.join(topics[j]), "children": children})
        if i < level-1:
            distances = np.matrix(ioFile.load_object(distanceFiles[i]))
            parent = np.squeeze(np.array(distances.argmin(1)))
        
    root = {"name": '...', "children": nodes}
    
    return root
Пример #11
0
def createBarChat(topicFiles, clf_topic_stat, years):
    N = len(topicFiles)
    bar_data = []
    for i in range(0, N):
        topics = ioFile.load_object(topicFiles[i])
        clf_topic = clf_topic_stat[i]
        year = years[i]
        doc_num, topic_percent = statOfClassification(clf_topic, topics)
        #print topic_percent
        bar_data.append({
            "year": year,
            "doc": doc_num,
            "topics": topic_percent
        })

    return bar_data
Пример #12
0
def createNode(filenames, clf_topic_stat=None):
    nodes = []
    topic_num = []
    i = 0
    for fname in filenames:
        topics = ioFile.load_object(fname)
        topic_num.append(len(topics))
        nodes.append({"name":''})
        if clf_topic_stat == None:
            for topic in topics:
                nodes.append({"name": ' '.join(topic)})
        else:
            clf_topic = clf_topic_stat[i]
            for index in clf_topic.keys():
                nodes.append({"name": ' '.join(topics[index])})
            i += 1

    #print "finish creating nodes"
    return nodes, np.array(topic_num)
Пример #13
0
def createNode(filenames, clf_topic_stat=None):
    nodes = []
    topic_num = []
    i = 0
    for fname in filenames:
        topics = ioFile.load_object(fname)
        topic_num.append(len(topics))
        nodes.append({"name": ''})
        if clf_topic_stat == None:
            for topic in topics:
                nodes.append({"name": ' '.join(topic)})
        else:
            clf_topic = clf_topic_stat[i]
            for index in clf_topic.keys():
                nodes.append({"name": ' '.join(topics[index])})
            i += 1

    #print "finish creating nodes"
    return nodes, np.array(topic_num)
Пример #14
0
def createLink(filenames, topic_num, fun, clf_topic_stat=None):
    # start index of each year
    topic_num += 1
    node_index = cumsum(topic_num).tolist()[:-1]
    node_index.insert(0, 0)

    links = []
    i = 0
    for fname in filenames:
        # indexes of first nodes in the graph for year i and i+1
        node_index_i = node_index[i]+1
        node_index_j = node_index[i+1]+1
        # distances between year i and i+1
        distances = ioFile.load_object(fname)
        if fun < 2:
            N = len(distances)
            for index_i in range(0, N):
                clf_topic = np.array(distances[index_i])
                if fun == 0:
                    index = np.where(clf_topic<distance_constraint)[0]
                    for index_j in index:
                        links.append({"source": node_index_i+index_i, "target": node_index_j+index_j, "value": 5})
                elif fun == 1:
                    index = np.where(clf_topic==clf_topic.min())[0][0]
                    links.append({"source": node_index_i+index_i, "target": node_index_j+index, "value": 5})
        elif fun == 2:
            for index_i, count in clf_topic_stat[i].iteritems():
                clf_topic = np.array(distances[index_i])
                index = set(np.where(clf_topic<distance_constraint)[0])
                index = index.intersection(set(clf_topic_stat[i+1].keys()))
                for index_j in index:
                    links.append({"source": node_index_i+clf_topic_stat[i].keys().index(index_i), 
                                  "target": node_index_j+clf_topic_stat[i+1].keys().index(index_j), 
                                  "value": 5})
                    
        i += 1
    
    #print "finish creating links"
    return links