Exemplos de Concept em Python, exemplos de MEDRank.umls.concept.Concept em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_ranked_converter.py Projeto: nhsland/MEDRank

 def buildConcepts(self):
     cc = ConcreteConcept('hareitosis', 'i', ['hareitosis'], ['B02.34'],
                          [''])
     cc2 = ConcreteConcept('inflammed hair cancer', 'a',
                           ['hairitis', 'hairiatic cancer'],
                           ['A12.23.45', 'B01.23'], [''])
     self.storage = {'c12345': cc, 'c98765': cc2}
     Concept.init_storage(self.storage)
     return (Concept('c12345'), Concept('c98765'))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_converter.py Projeto: YZWD/MEDRank

 def buildConcepts(self):
     cc=ConcreteConcept('hareitosis', 'i', ['hareitosis'], ['B02.34'], 
                        [''])
     cc2=ConcreteConcept('inflammed hair cancer', 'a',
                         ['hairitis', 'hairiatic cancer'],
                         ['A12.23.45', 'B01.23'], [''])
     self.storage={'c12345': cc, 'c98765': cc2}
     Concept.init_storage(self.storage)
     return (Concept('c12345'), Concept('c98765'))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_converter.py Projeto: nhsland/MEDRank

 def testWrongMappingType(self):
     c1, c2 = self.buildConcepts()
     cc3 = ConcreteConcept('hairosis', 'g/p', ['hairotic hairisis'],
                           ['C1.2.3'], [''])
     # Add it in a roundabout way to the original storage
     self.storage['c2468'] = cc3
     c3 = Concept('c2468')
     self.assertEqual([], self.ruleless_converter.convert(c3).utterance)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: workflow.py Projeto: YZWD/MEDRank

 def __init__(self, reader, graph_builder, ranker, eval_parameters, 
              ranking_cutoff,
              mesh_tree_filename, distance_matrix_filename,
              distance_function,
              umls_converter_data_filename, umls_concept_data_filename,
              output_file):
     logging.debug("Setting up a Workflow instance.")
     logging.debug("My reader is: %r", reader)
     self._reader=reader
     logging.debug("My graph builder is: %r", graph_builder)
     self._graph_builder=graph_builder
     self._ranker=MappedRanker(ranker)
     logging.debug("My ranker is: %r", self._ranker)
     self._ranking_cutoff=ranking_cutoff
     logging.debug("My ranking cutoff is: %r", self._ranking_cutoff)
     logging.debug("Creating a Tree instance from %s", mesh_tree_filename)
     self._mesh_tree=Tree(mesh_tree_filename)
     logging.debug("Creating SAVCC distance matrix with %r and distance "
                   "function %r", 
                   distance_matrix_filename, distance_function)
     self._matrix=SavccNormalizedMatrix(
                   open(distance_matrix_filename, "rb"), distance_function)
     logging.debug("Filling in the rest of the evaluation parameters.")
     self._eval_parameters=eval_parameters
     self._eval_parameters.mesh_tree=self._mesh_tree
     self._eval_parameters.savcc_matrix=self._matrix
     logging.debug("My evaluation parameters are: %r", 
                   self._eval_parameters)
     if umls_converter_data_filename is None:
         converter_data=None
     else:
         converter_data=pickle.load(open(umls_converter_data_filename, 
                                         "rb"))
     self._umls_converter=RankedConverter(Converter(self._mesh_tree, 
                                                    converter_data))
     logging.debug("My converter is: %r", self._umls_converter)
     logging.debug("Initializing Concept storage from %s", 
                   umls_concept_data_filename)
     if umls_concept_data_filename is None:
         Concept.init_storage()
     else:
         Concept.init_storage(StringDBDict(umls_concept_data_filename))
     self._output_file=output_file
     logging.debug("My output file is: %r", self._output_file)
     return

Exemplo n.º 5

0

Exibir arquivo

 def __init__(self, reader, graph_builder, ranker, eval_parameters,
              ranking_cutoff, mesh_tree_filename, distance_matrix_filename,
              distance_function, umls_converter_data_filename,
              umls_concept_data_filename, output_file):
     logging.debug("Setting up a Workflow instance.")
     logging.debug("My reader is: %r", reader)
     self._reader = reader
     logging.debug("My graph builder is: %r", graph_builder)
     self._graph_builder = graph_builder
     self._ranker = MappedRanker(ranker)
     logging.debug("My ranker is: %r", self._ranker)
     self._ranking_cutoff = ranking_cutoff
     logging.debug("My ranking cutoff is: %r", self._ranking_cutoff)
     logging.debug("Creating a Tree instance from %s", mesh_tree_filename)
     self._mesh_tree = Tree(mesh_tree_filename)
     logging.debug(
         "Creating SAVCC distance matrix with %r and distance "
         "function %r", distance_matrix_filename, distance_function)
     self._matrix = SavccNormalizedMatrix(
         open(distance_matrix_filename, "rb"), distance_function)
     logging.debug("Filling in the rest of the evaluation parameters.")
     self._eval_parameters = eval_parameters
     self._eval_parameters.mesh_tree = self._mesh_tree
     self._eval_parameters.savcc_matrix = self._matrix
     logging.debug("My evaluation parameters are: %r",
                   self._eval_parameters)
     if umls_converter_data_filename is None:
         converter_data = None
     else:
         converter_data = pickle.load(
             open(umls_converter_data_filename, "rb"))
     self._umls_converter = RankedConverter(
         Converter(self._mesh_tree, converter_data))
     logging.debug("My converter is: %r", self._umls_converter)
     logging.debug("Initializing Concept storage from %s",
                   umls_concept_data_filename)
     if umls_concept_data_filename is None:
         Concept.init_storage()
     else:
         Concept.init_storage(StringDBDict(umls_concept_data_filename))
     self._output_file = output_file
     logging.debug("My output file is: %r", self._output_file)
     return

Exemplo n.º 6

0

Exibir arquivo

 def convert(self, a_ranked_result_set):
     """Convert a ranked result set into a RankedConversionResult.
     In other words, convert a ranked term list to its MeSH equivalents."""
     result = RankedConversionResult()
     self._my_converter.start_conversion()
     for incoming_term, incoming_score in a_ranked_result_set:
         converted = self._my_converter.convert(
             Concept(incoming_term.node_id))
         if converted.utterance != []:
             result.add_term_score(converted, incoming_score)
         converted = self._my_converter.end_conversion()
         if converted.utterance != []:
             result.add_term_score(converted,
                                   incoming_score + self._checktag_boost)
     logging.log(ULTRADEBUG, "RankedConverter results: %r", result)
     return result

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_converter.py Projeto: nhsland/MEDRank

 def testTermNotInTreeRaisesException(self):
     c1, c2 = self.buildConcepts()
     cc3 = ConcreteConcept(
         'hairosis',
         'i',
         [
             'hairotic hairisis',
             'hair-raising hair',  # <--This'll be a descriptor
             'hurry'
         ],
         ['Q12345678', 'D123456', 'Q987564'],
         [''])
     # Add it in a roundabout way to the original storage
     self.storage['c2468'] = cc3
     c3 = Concept('c2468')
     self.ruleless_converter._skip_unknown = False
     self.assertRaises(TermNotInTree, self.ruleless_converter.convert, c3)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_converter.py Projeto: nhsland/MEDRank

 def testDeepestOnePreferred(self):
     c1, c2 = self.buildConcepts()
     cc3 = ConcreteConcept(
         'hair-raising hair',
         'i',
         [
             'hairitis',  # <-- a descriptor
             'hairiatic cancer',  # <-- a descriptor
             'hareitosis'
         ],  # <-- a descriptor
         ['D12345678', 'D123456', 'D987564'],
         [''])
     # Add it in a roundabout way to the original storage
     self.storage['c2468'] = cc3
     c3 = Concept('c2468')
     # Should choose the deepest one (hairiatic cancer, according to
     # self.my_tree)
     self.assertEqual([Term('hairiatic cancer')],
                      self.ruleless_converter.convert(c3).utterance)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_converter.py Projeto: nhsland/MEDRank

 def testDescriptorNamedLikeConceptPreferred(self):
     c1, c2 = self.buildConcepts()
     cc3 = ConcreteConcept(
         'hair-raising hair',
         'i',
         [
             'hairotic hairisis',  # <-- a descriptor
             'hair-raising hair',  # <-- a descriptor
             'hurry'
         ],  # <-- not a descriptor
         ['D12345678', 'D123456', 'Q987564'],
         [''])
     # Add it in a roundabout way to the original storage
     self.storage['c2468'] = cc3
     c3 = Concept('c2468')
     # Make sure the tree knows about this concept
     self.my_tree._tree['hair-raising hair'] = TreeNode(
         'hair-raising hair', 'MH', 'Q', set(['B12.34']))
     self.assertEqual([Term('hair-raising hair')],
                      self.ruleless_converter.convert(c3).utterance)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: multiprocessing_workflow.py Projeto: YZWD/MEDRank

def multi_processor(reader,
                    workflow_class,
                    graph_builder_constructor, graph_builder_params,
                    ranker_constructor, ranker_params,
                    eval_parameters, 
                    ranking_cutoff,
                    mesh_tree_filename, distance_matrix_filename,
                    distance_function,
                    umls_converter_data_filename,
                    umls_concept_data_filename,
                    extra_data_name,
                    extra_data_contents,
                    output_file,
                    num_processes=None,
                    queue_size=None,
                    output_callback=output,
                    output_headers_callback=output_headers,
                    output_item_callback=output_one_item,
                    performance_tuning=True):
    """
    Perform the evaluation.
    Multiprocessing notes: It's the responsibility of the caller to make sure that
    extra_data_contents, if any, are multiprocessing-safe. For example, by using
    a SyncManager and Namespace and passing the proxy. See umls/concept for an example.
    """
    
    if num_processes is None:
        num_processes=cpu_count()

    if performance_tuning:
        # Since reading the file involves an awful lot of object creation 
        # and destruction we'll tweak the gc adjustments to sweep less frequently
        # IOW - we have a LOT of short-lived objects. No sense garbage-collecting
        # the latter generations very often.    
        # (this is about 10x, 5x, and 5x the usual)
        original_threshold=gc.get_threshold()
        gc.set_threshold(10 * original_threshold[0], 
                         5 * original_threshold[1],
                         5 * original_threshold[1]) 
        original_check_interval=sys.getcheckinterval()
        # Similarly, we'll try to minimize overhead from thread switches
        # 5x usual value
        sys.setcheckinterval(5*original_check_interval)
    logging.debug("Initializing Concept storage from %s", 
                  umls_concept_data_filename)
                  
    if umls_concept_data_filename is None:
        Concept.init_storage()
    else:
        Concept.init_storage(StringDBDict(umls_concept_data_filename))
    Pmid.init_storage()

    proctitle.setproctitle("MEDRank-main")
    
    processes=[]
    logging.info("Creating %d worker processes.", num_processes)
    #task_queue=[JoinableQueue(queue_size) for x in xrange(num_processes)]
    task_queues=[Queue(queue_size) for x in xrange(num_processes)]
    this_output_queue=Queue(2*queue_size)

    # Create an output processor
    output_processor=Process(target=output_callback, 
                             args=(output_file, 
                                   this_output_queue,
                                   output_headers_callback,
                                   output_item_callback))
    output_processor.start()
    
    for i in xrange(num_processes):
        this_process=Process(target=processor, args=(workflow_class,
                                                graph_builder_constructor, 
                                                graph_builder_params,
                                                ranker_constructor, 
                                                ranker_params,
                                                eval_parameters, 
                                                ranking_cutoff,
                                                mesh_tree_filename,
                                                distance_matrix_filename,
                                                distance_function,
                                                umls_converter_data_filename,
                                                extra_data_name,
                                                extra_data_contents,
                                                task_queues[i],
                                                this_output_queue,
                                                "MEDRank-Worker-%d" % i),
                             name="MEDRank-Worker-%d" % i)
        logging.log(ULTRADEBUG, "Created process: %r", this_process)
        this_process.start()
        processes.append((this_process, this_output_queue, task_queues[i]))
    
    all_results={}
    count=0

    # Use a single dispatch queue for automagical load balancing
    # CHANGED - Now uses multiple queues to avoid starving due to waiting on semlocks
    for each_article in reader:
        count+=1
        #queues_and_sizes=[(task_queues[x].qsize(), x) 
        #                  for x in xrange(num_processes)]
        #queues_and_sizes.sort()
        #target_process=queues_and_sizes[0][1]
        # logging.info("Dispatching article %d: %r", count, each_article)
        target_process=(count-1) % num_processes
        #Lowest-loaded process first.
        logging.info("Dispatching article %d: %s to %s", 
                     count,
                     each_article.set_id,
                     processes[target_process][0].name)
        task_queues[target_process].put(each_article)
        #task_queue[target_process].put(each_article)
        #task_queue.put(each_article)
        #logging.info("The task queue is approximately %d items long.", 
        #             task_queue.qsize())

    logging.log(ULTRADEBUG, "Waiting for processing to end.")
    all_results={}

    alive_processes=[x for x in processes if x[0].is_alive()]
    remaining_processes=len(alive_processes)

    logging.info("There are %d processes (out of %d) still alive.", 
                 remaining_processes,
                 num_processes)
    for i in xrange(remaining_processes):
        alive_processes[i][2].put('STOP')
        alive_processes[i][2].close()
    logging.debug("Sent STOP requests. Notifying queue that no further "
                  "requests will come.")

    logging.info("All information sent to the processors.")

    # Back to normal
    if performance_tuning:
        gc.set_threshold(original_threshold[0],
                         original_threshold[1],
                         original_threshold[2])
        sys.setcheckinterval(original_check_interval)

    # Note end of output

    while len(processes)>0:
        a_process=processes.pop()
        # We join the process to wait for the end of the reading 
        a_process[0].join()
        # logging.log(ULTRADEBUG, "Fetching results from finished process.")
        # all_results.update(a_process[1].get()) # Add results to result pool
        # logging.log(ULTRADEBUG, "Received results.")
    logging.info("Finishing writing out results.")
    this_output_queue.put("STOP")
    output_processor.join()
    logging.info("Results written. Finishing multiprocessing.")
    return

Exemplo n.º 11

0

Exibir arquivo

def multi_processor(reader,
                    workflow_class,
                    graph_builder_constructor, graph_builder_params,
                    ranker_constructor, ranker_params,
                    eval_parameters, 
                    ranking_cutoff,
                    mesh_tree_filename, distance_matrix_filename,
                    distance_function,
                    umls_converter_data_filename,
                    umls_concept_data_filename,
                    extra_data_name,
                    extra_data_contents,
                    output_file,
                    num_threads=None,
                    queue_size=None,
                    output_callback=output,
                    output_headers_callback=output_headers,
                    output_item_callback=output_one_item,
                    performance_tuning=True):
    """
    Perform the evaluation.
    Multithreading notes: It's the responsibility of the caller to make sure
    that extra_data_contents, if any, are thread-safe. 
    """
    if num_threads is None:
        num_threads=1

    logging.debug("Initializing Concept storage from %s", 
                  umls_concept_data_filename)
                  
    # Since there's no direct way of setting the concept cache's title, 
    # we set it here, wait for it to be inherited, and then get the 'real' 
    # process title for this one. 
    if umls_concept_data_filename is None:
        Concept.init_storage()
    else:
        Concept.init_storage(StringDBDict(umls_concept_data_filename))
    Pmid.init_storage()

    threads=[]
    logging.info("Creating %d worker threads.", num_threads)
    #task_queue=[JoinableQueue(queue_size) for x in xrange(num_processes)]
    task_queues=[Queue(queue_size) for x in xrange(num_threads)]
    this_output_queue=Queue(2*queue_size)

    # Create an output processor
    output_processor=Thread(target=output_callback, 
                             args=(output_file, 
                                   this_output_queue,
                                   output_headers_callback,
                                   output_item_callback))
    output_processor.start()
    
    for i in xrange(num_threads):
        this_thread=Thread(target=processor, args=(workflow_class,
                                                graph_builder_constructor, 
                                                graph_builder_params,
                                                ranker_constructor, 
                                                ranker_params,
                                                eval_parameters, 
                                                ranking_cutoff,
                                                mesh_tree_filename,
                                                distance_matrix_filename,
                                                distance_function,
                                                umls_converter_data_filename,
                                                extra_data_name,
                                                extra_data_contents,
                                                task_queues[i],
                                                this_output_queue),
                             name="MEDRank-Worker-%d" % i)
        logging.log(ULTRADEBUG, "Created thread: %r", this_thread)
        this_thread.start()
        threads.append((this_thread, this_output_queue, task_queues[i]))
    
    all_results={}
    count=0

    # Use a single dispatch queue for automagical load balancing
    # CHANGED - Now uses multiple queues to avoid starving due to waiting on semlocks
    for each_article in reader:
        count+=1
        # logging.info("Dispatching article %d: %r", count, each_article)
        target_thread=(count-1) % num_threads
        logging.info("Dispatching article %d: %s to %s", 
                     count,
                     each_article.set_id,
                     threads[target_thread][0].name)
        task_queues[target_thread].put(each_article)
        #task_queue[target_process].put(each_article)
        #task_queue.put(each_article)
        #logging.info("The task queue is approximately %d items long.", 
        #             task_queue.qsize())

    logging.log(ULTRADEBUG, "Waiting for processing to end.")
    all_results={}

    alive_threads=[x for x in threads if x[0].is_alive()]
    remaining_threads=len(alive_threads)

    logging.info("There are %d threads (out of %d) still alive.", 
                 remaining_threads,
                 num_threads)
    for i in xrange(remaining_threads):
        alive_threads[i][2].put('STOP')
        #alive_threads[i][2].close()
    logging.debug("Sent STOP requests. Notifying queue that no further "
                  "requests will come.")

    logging.info("All information sent to the threads.")

    # Note end of output

    while len(threads)>0:
        a_thread=threads.pop()
        # We join the process to wait for the end of the reading 
        a_thread[0].join()
        # logging.log(ULTRADEBUG, "Fetching results from finished process.")
        # all_results.update(a_process[1].get()) # Add results to result pool
        # logging.log(ULTRADEBUG, "Received results.")
    logging.info("Finishing writing out results.")
    this_output_queue.put("STOP")
    output_processor.join()
    logging.info("Results written. Finishing multithreading.")
    Pmid.close_storage()
    return

Exemplo n.º 12

0

Exibir arquivo

 def __init__(self, original_line, converter):
     RelationLine.__init__(self, original_line)
     self._mesh1 = converter.convert(Concept(self.CUI1))
     self._mesh2 = converter.convert(Concept(self.CUI2))
     dummy = converter.end_conversion()  # Discard extra terms generated by

Exemplo n.º 13

0

Exibir arquivo

 def __init__(self, original_line, converter):
     EntityLine.__init__(self, original_line)
     self._mesh = converter.convert(Concept(self.CUI))
     dummy = converter.end_conversion()  # Discard extra terms generated by

Exemplo n.º 14

0

Exibir arquivo

Arquivo: multithreaded_workflow.py Projeto: jherskovic/MEDRank

def multi_processor(
    reader,
    workflow_class,
    graph_builder_constructor,
    graph_builder_params,
    ranker_constructor,
    ranker_params,
    eval_parameters,
    ranking_cutoff,
    mesh_tree_filename,
    distance_matrix_filename,
    distance_function,
    umls_converter_data_filename,
    umls_concept_data_filename,
    extra_data_name,
    extra_data_contents,
    output_file,
    num_threads=None,
    queue_size=None,
    output_callback=output,
    output_headers_callback=output_headers,
    output_item_callback=output_one_item,
    performance_tuning=True,
):
    """
    Perform the evaluation.
    Multithreading notes: It's the responsibility of the caller to make sure
    that extra_data_contents, if any, are thread-safe. 
    """
    if num_threads is None:
        num_threads = 1

    logging.debug("Initializing Concept storage from %s", umls_concept_data_filename)

    # Since there's no direct way of setting the concept cache's title,
    # we set it here, wait for it to be inherited, and then get the 'real'
    # process title for this one.
    if umls_concept_data_filename is None:
        Concept.init_storage()
    else:
        Concept.init_storage(StringDBDict(umls_concept_data_filename))
    Pmid.init_storage()

    threads = []
    logging.info("Creating %d worker threads.", num_threads)
    # task_queue=[JoinableQueue(queue_size) for x in xrange(num_processes)]
    task_queues = [Queue(queue_size) for x in xrange(num_threads)]
    this_output_queue = Queue(2 * queue_size)

    # Create an output processor
    output_processor = Thread(
        target=output_callback, args=(output_file, this_output_queue, output_headers_callback, output_item_callback)
    )
    output_processor.start()

    for i in xrange(num_threads):
        this_thread = Thread(
            target=processor,
            args=(
                workflow_class,
                graph_builder_constructor,
                graph_builder_params,
                ranker_constructor,
                ranker_params,
                eval_parameters,
                ranking_cutoff,
                mesh_tree_filename,
                distance_matrix_filename,
                distance_function,
                umls_converter_data_filename,
                extra_data_name,
                extra_data_contents,
                task_queues[i],
                this_output_queue,
            ),
            name="MEDRank-Worker-%d" % i,
        )
        logging.log(ULTRADEBUG, "Created thread: %r", this_thread)
        this_thread.start()
        threads.append((this_thread, this_output_queue, task_queues[i]))

    all_results = {}
    count = 0

    # Use a single dispatch queue for automagical load balancing
    # CHANGED - Now uses multiple queues to avoid starving due to waiting on semlocks
    for each_article in reader:
        count += 1
        # logging.info("Dispatching article %d: %r", count, each_article)
        target_thread = (count - 1) % num_threads
        logging.info("Dispatching article %d: %s to %s", count, each_article.set_id, threads[target_thread][0].name)
        task_queues[target_thread].put(each_article)
        # task_queue[target_process].put(each_article)
        # task_queue.put(each_article)
        # logging.info("The task queue is approximately %d items long.",
        #             task_queue.qsize())

    logging.log(ULTRADEBUG, "Waiting for processing to end.")
    all_results = {}

    alive_threads = [x for x in threads if x[0].is_alive()]
    remaining_threads = len(alive_threads)

    logging.info("There are %d threads (out of %d) still alive.", remaining_threads, num_threads)
    for i in xrange(remaining_threads):
        alive_threads[i][2].put("STOP")
        # alive_threads[i][2].close()
    logging.debug("Sent STOP requests. Notifying queue that no further " "requests will come.")

    logging.info("All information sent to the threads.")

    # Note end of output

    while len(threads) > 0:
        a_thread = threads.pop()
        # We join the process to wait for the end of the reading
        a_thread[0].join()
        # logging.log(ULTRADEBUG, "Fetching results from finished process.")
        # all_results.update(a_process[1].get()) # Add results to result pool
        # logging.log(ULTRADEBUG, "Received results.")
    logging.info("Finishing writing out results.")
    this_output_queue.put("STOP")
    output_processor.join()
    logging.info("Results written. Finishing multithreading.")
    Pmid.close_storage()
    return

Exemplo n.º 15

0

Exibir arquivo

def multi_processor(reader,
                    workflow_class,
                    graph_builder_constructor,
                    graph_builder_params,
                    ranker_constructor,
                    ranker_params,
                    eval_parameters,
                    ranking_cutoff,
                    mesh_tree_filename,
                    distance_matrix_filename,
                    distance_function,
                    umls_converter_data_filename,
                    umls_concept_data_filename,
                    extra_data_name,
                    extra_data_contents,
                    output_file,
                    num_processes=None,
                    queue_size=None,
                    output_callback=output,
                    output_headers_callback=output_headers,
                    output_item_callback=output_one_item,
                    performance_tuning=True):
    """
    Perform the evaluation.
    Multiprocessing notes: It's the responsibility of the caller to make sure that
    extra_data_contents, if any, are multiprocessing-safe. For example, by using
    a SyncManager and Namespace and passing the proxy. See umls/concept for an example.
    """

    if num_processes is None:
        num_processes = cpu_count()

    if performance_tuning:
        # Since reading the file involves an awful lot of object creation
        # and destruction we'll tweak the gc adjustments to sweep less frequently
        # IOW - we have a LOT of short-lived objects. No sense garbage-collecting
        # the latter generations very often.
        # (this is about 10x, 5x, and 5x the usual)
        original_threshold = gc.get_threshold()
        gc.set_threshold(10 * original_threshold[0], 5 * original_threshold[1],
                         5 * original_threshold[1])
        original_check_interval = sys.getcheckinterval()
        # Similarly, we'll try to minimize overhead from thread switches
        # 5x usual value
        sys.setcheckinterval(5 * original_check_interval)
    logging.debug("Initializing Concept storage from %s",
                  umls_concept_data_filename)

    if umls_concept_data_filename is None:
        Concept.init_storage()
    else:
        Concept.init_storage(StringDBDict(umls_concept_data_filename))
    Pmid.init_storage()

    proctitle.setproctitle("MEDRank-main")

    processes = []
    logging.info("Creating %d worker processes.", num_processes)
    #task_queue=[JoinableQueue(queue_size) for x in xrange(num_processes)]
    task_queues = [Queue(queue_size) for x in xrange(num_processes)]
    this_output_queue = Queue(2 * queue_size)

    # Create an output processor
    output_processor = Process(target=output_callback,
                               args=(output_file, this_output_queue,
                                     output_headers_callback,
                                     output_item_callback))
    output_processor.start()

    for i in xrange(num_processes):
        this_process = Process(
            target=processor,
            args=(workflow_class, graph_builder_constructor,
                  graph_builder_params, ranker_constructor, ranker_params,
                  eval_parameters, ranking_cutoff, mesh_tree_filename,
                  distance_matrix_filename, distance_function,
                  umls_converter_data_filename, extra_data_name,
                  extra_data_contents, task_queues[i], this_output_queue,
                  "MEDRank-Worker-%d" % i),
            name="MEDRank-Worker-%d" % i)
        logging.log(ULTRADEBUG, "Created process: %r", this_process)
        this_process.start()
        processes.append((this_process, this_output_queue, task_queues[i]))

    all_results = {}
    count = 0

    # Use a single dispatch queue for automagical load balancing
    # CHANGED - Now uses multiple queues to avoid starving due to waiting on semlocks
    for each_article in reader:
        count += 1
        #queues_and_sizes=[(task_queues[x].qsize(), x)
        #                  for x in xrange(num_processes)]
        #queues_and_sizes.sort()
        #target_process=queues_and_sizes[0][1]
        # logging.info("Dispatching article %d: %r", count, each_article)
        target_process = (count - 1) % num_processes
        #Lowest-loaded process first.
        logging.info("Dispatching article %d: %s to %s", count,
                     each_article.set_id, processes[target_process][0].name)
        task_queues[target_process].put(each_article)
        #task_queue[target_process].put(each_article)
        #task_queue.put(each_article)
        #logging.info("The task queue is approximately %d items long.",
        #             task_queue.qsize())

    logging.log(ULTRADEBUG, "Waiting for processing to end.")
    all_results = {}

    alive_processes = [x for x in processes if x[0].is_alive()]
    remaining_processes = len(alive_processes)

    logging.info("There are %d processes (out of %d) still alive.",
                 remaining_processes, num_processes)
    for i in xrange(remaining_processes):
        alive_processes[i][2].put('STOP')
        alive_processes[i][2].close()
    logging.debug("Sent STOP requests. Notifying queue that no further "
                  "requests will come.")

    logging.info("All information sent to the processors.")

    # Back to normal
    if performance_tuning:
        gc.set_threshold(original_threshold[0], original_threshold[1],
                         original_threshold[2])
        sys.setcheckinterval(original_check_interval)

    # Note end of output

    while len(processes) > 0:
        a_process = processes.pop()
        # We join the process to wait for the end of the reading
        a_process[0].join()
        # logging.log(ULTRADEBUG, "Fetching results from finished process.")
        # all_results.update(a_process[1].get()) # Add results to result pool
        # logging.log(ULTRADEBUG, "Received results.")
    logging.info("Finishing writing out results.")
    this_output_queue.put("STOP")
    output_processor.join()
    logging.info("Results written. Finishing multiprocessing.")
    return