/
__init__.py
71 lines (61 loc) · 2.42 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import data
import graph
import graph_representation
import freq_representation
import evaluation
def classification_demo():
"""Function intended to illustrate classification in the experimental framework.
Intended as a basis for new experiments for those not intimately
familiar with the code.
"""
print 'Evaluation type: Classification'
print 'Graph type: Co-occurrence w/2-word window context'
print 'Centrality: Weighted degree'
print
print '> Reading data..'
corpus_path = '../data/tasa/TASA900_preprocessed'
docs, labels = data.read_files(corpus_path)
print '> Creating representations..'
dicts = []
for i, doc in enumerate(docs):
print ' ',str(i)+'/'+str(len(docs))
g = graph_representation.construct_cooccurrence_network(doc)
d = graph_representation.graph_to_dict(g, graph.GraphMetrics.WEIGHTED_DEGREE)
dicts.append(d)
vectors = graph_representation.dicts_to_vectors(dicts)
print '> Evaluating..'
score = evaluation.evaluate_classification(vectors, labels)
print ' score:', score
print
def retrieval_demo():
"""Function intended to illustrate retrieval in the experimental framework.
Intended as a basis for new experiments for those not intimately
familiar with the code.
"""
print 'Evaluation type: Retrieval'
print 'Graph type: Dependency'
print 'Centrality: PageRank'
print
print '> Reading data..'
desc_path = '../data/air/problem_descriptions_dependencies'
sol_path = '../data/air/solutions_preprocessed'
problems, _ = data.read_files(desc_path)
solutions, _ = data.read_files(sol_path)
print '> Creating solution representations..'
metric = freq_representation.FrequencyMetrics.TF_IDF
sol_vectors = freq_representation.text_to_vector(solutions, metric)
print '> Creating problem description representations..'
dicts = []
for i, doc in enumerate(problems):
print ' ',str(i)+'/'+str(len(problems))
g = graph_representation.construct_dependency_network(doc)
d = graph_representation.graph_to_dict(g, graph.GraphMetrics.PAGERANK)
dicts.append(d)
desc_vectors = graph_representation.dicts_to_vectors(dicts)
print '> Evaluating..'
score = evaluation.evaluate_retrieval(desc_vectors, sol_vectors)
print ' score:', score
print
if __name__=='__main__':
classification_demo()
retrieval_demo()