def parse(self): global_run_id = None history = set() results = Results() with open(self.filename) as f: for line in f: line_components = line.strip().split() if len(line_components) != 6: raise ResultsParseError( 'lines in results file should have exactly 6 columns') query_id, _, doc_id, rank, score, run_id = line_components rank = int(rank) score = float(score) if global_run_id is None: global_run_id = run_id elif global_run_id != run_id: raise ResultsParseError( 'Mismatching runIDs in results file') key = query_id + doc_id if key in history: raise ResultsParseError( 'Duplicate query_id, doc_id in results file') history.add(key) results.add_result(query_id, Result(doc_id, score, rank)) return global_run_id, results
def parse(self): global_run_id = None history = set() results = Results() with open(self.filename) as f: for line in f: line_components = line.strip().split() if len(line_components) != 6: raise ResultsParser.ResultsParseError( 'lines in %s do not have exactly 6 columns' % self.filename) query_id, _, doc_id, rank, score, run_id = line_components try: rank = int(rank) score = float(score) except ValueError: raise ResultsParser.ResultsParseError( 'Error parsing rank or score in %s' % self.filename) if doc_id not in self.docno_list: raise ResultsParser.ResultsParseError( 'Docno %s does not exist for query %s in %s' % (doc_id, query_id, self.filename)) if global_run_id is None: global_run_id = run_id elif global_run_id != run_id: raise ResultsParser.ResultsParseError( 'Mismatching runIDs in %s' % self.filename) key = query_id + doc_id if key in history: raise ResultsParser.ResultsParseError( 'Duplicate query_id, doc_id in %s' % self.filename) history.add(key) results.add_result(query_id, Result(doc_id, score, rank)) return global_run_id, results