def pjk_writer(entries, output_file, bench_freq): pjk = PajekFactory() b = Benchmark() for entry_list in iter(entries.get, 'STOP'): for entry in entry_list: for citation in entry["citations"]: pjk.add_edge(entry["id"], citation) b.increment() entries.task_done() b.print_freq() with open_file(output_file, "w") as f: pjk.write(f) entries.task_done()
help="Directory to store temporary files in", default=None) parser.add_argument( '--subject', '-s', help= "For WoS, subject must include this. Can be a comma seperated list.") parser.add_argument( '--wos-only', help="For WoS, exclude any citations or ids that contain a dot (.)", action="store_true") parser.add_argument('infile', nargs='+') arguments = parser.parse_args() b = Benchmark() pjk = PajekFactory(temp_dir=arguments.temp_dir) subjects = None if arguments.subject: subjects = set(arguments.subject.split(",")) for filename in arguments.infile: with open_file(filename) as f: for line in f: entry = ujson.loads(line) b.increment() if arguments.wos_only and '.' in entry["id"]: continue if subjects:
def setUp(self): self.output = io.StringIO() self._node_stream = io.StringIO() self.pjk = PajekFactory()