-
Notifications
You must be signed in to change notification settings - Fork 1
/
parseQueries.py
54 lines (44 loc) · 2.03 KB
/
parseQueries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
##############################################################
# Name: ParseQ
# Purpose: This modeule is designed to parse the given example queries
# Author: Damien Peltier & Corentin Seitre
# Created: 12/15 - 01/16
##############################################################
from parseCollection import replacePunct
import vectorial
import itertools
import argparse
def parseQueries():
with open("../CACM/query.text", "r") as cacm:
collection = cacm.read()
with open("../CACM/common_words", "r") as cw:
files = [item.split("\n.") for item in collection.split(".I ")]
return files
def parseResults():
parsed_resp = {}
with open("../CACM/qrels.text", "r") as qrels:
resps = qrels.read()
lines = [item.split(" ") for item in resps.split("\n") if item != ""]
for line in lines:
if line[0] in parsed_resp:
parsed_resp[line[0]] += [line[1]]
else:
parsed_resp[line[0]] = [line[1]]
#pprint.pprint(parsed_resp)
return parsed_resp
def process(collection, reverseType):
queries = parseQueries()
parts = {item[0].rstrip(): list(itertools.chain(*([replacePunct(line[1:])
for line in item[1:]
if line[0] == "W"])))
for item in queries}
for index, qu in parts.iteritems():
#print(collection, index, " ".join(qu))
yield (index, qu, vectorial.main(collection, reverseType, " ".join(qu)))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Query CACM or WIKI and get vectorial results")
parser.add_argument("-c", "--collection", default="CACM", help="The collection we want to query from")
parser.add_argument("-i", "--inverse", default="standard", help="The type of inverse freq index to use (standard, "
"tfidf, tfidfnorm")
args = parser.parse_args()
#print process(args.collection, args.inverse)