def setUp(self): self.tasks = {} for lang in [('en', 'E'), ('ja', 'J')]: queryfilepath = create_query_subset( './data/MC2-training/%s/1C2-%s-queries.tsv' % lang, './data/MC2-training-documents/1C2-%s.INDX/' % lang[1]) intentfilepath = create_tmp_intent_file(queryfilepath) self.tasks[lang[1]] = Task.read(queryfilepath, './data/MC2-training/%s/1C2-%s-iunits.tsv' % lang, './data/MC2-training-documents/1C2-%s.INDX/' % lang[1], './data/MC2-training-documents/1C2-%s.HTML/' % lang[1], intentfilepath)
def test_task_read(self): """ Task.read (with intents) """ tasks = Task.read( self.queryfilepath, "./data/MC2-training/en/1C2-E-iunits.tsv", "./data/MC2-training-documents/1C2-E.INDX/", "./data/MC2-training-documents/1C2-E.HTML/", self.intentfilepath, ) self.assertEqual(len(tasks), 5) self.assertEqual(tasks[0].query.qid, "1C2-E-0001") self.assertEqual(len(tasks[0].intents), 5) self.assertEqual(tasks[0].intents[0].qid, "1C2-E-0001") self.assertEqual(tasks[0].intents[0].iid, "1C2-E-0001-INTENT0001")
def test_task_read(self): ''' Task.read ''' tasks = Task.read(self.queryfilepath, './data/MC2-training/en/1C2-E-iunits.tsv', './data/MC2-training-documents/1C2-E.INDX/', './data/MC2-training-documents/1C2-E.HTML/') self.assertEqual(len(tasks), 5) self.assertEqual(tasks[0].query.qid, '1C2-E-0001') self.assertEqual(len(tasks[0].iunits), 19) self.assertEqual(tasks[0].iunits[0].qid, '1C2-E-0001') self.assertEqual(tasks[0].iunits[0].uid, '1C2-E-0001-0001') self.assertEqual(len(tasks[0].indices), 213) self.assertEqual(tasks[0].indices[0].qid, '1C2-E-0001') self.assertEqual(tasks[0].indices[0].rank, 1)
Index.qid: Query ID Index.filepath: filepath of an HTML file Index.rank: rank in a search engine result page Index.title: webpage title Index.url: webpage url Index.body: summary of the webpage """ def rank(self, task): """ Output ranked pairs of an iUnits and a score e.g. Random ranking method return [(i, 0) for i in task.iunits] """ return [(i, 0) for i in task.iunits] if __name__ == "__main__": from mobileclick import Task tasks = Task.read( "data/MC2-training/en/1C2-E-queries.tsv", "data/MC2-training/en/1C2-E-iunits.tsv", "data/MC2-training-documents/1C2-E.INDX", "data/MC2-training-documents/1C2-E.HTML", ) method = YourRankingMethod() run = method.generate_run("YourRun", "This is your run", tasks) run.save("./")
summary.add(iunit6) # added to the first layer summary.add(iunit7, intent1.iid) # added to the second layer summary.add(intent3) # added to the first layer summary.add(iunit8, intent3.iid) # added to the second layer The resultant summary is First layer: iunit1, iunit2, intent1, intent2, iunit6, intent3 Second layer: intent1: iunit3, iunit4, iunit7 intent2: iunit3, iunit5 intent3: iunit8 e.g. Random summarization method return Summary(task.query.qid, task.iunits) ''' return Summary(task.query.qid, task.iunits) if __name__ == '__main__': from mobileclick import Task tasks = Task.read( "data/MC2-test/en/MC2-E-queries.tsv", "data/MC2-test/en/MC2-E-iunits.tsv", "data/MC2-test-documents/MC2-E.INDX", "data/MC2-test-documents/MC2-E.HTML", "data/MC2-test/en/MC2-E-intents.tsv") method = YourSummarizationMethod() run = method.generate_run("YourRun", "This is your run", tasks) run.save('./')