def main(): my_format_string='%(asctime)s %(levelname)s %(module)s.' \ '%(funcName)s: %(message)s' logging.basicConfig(level=logging.INFO, format=my_format_string) chunkmap = chunkmap_factory( pickle.load(bz2.BZ2File('test_data/5th.chunkmap.bz2'))) semrep_reader = SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'), DEFAULT_LINES_TO_IGNORE, chunkmap) tfidf = TF_IDF(file_mode="c") tfidf.build_tf_from_file(semrep_reader) semrep_reader.rewind() semrep_grapher = SemrepCooccurrenceGraphBuilder( node_weight_threshold=0.001, link_weight_threshold=0.003, tf_idf_provider=tfidf) eval_params = EvaluationParameters() eval_params.alpha = 0.65 work = myWorkflow(semrep_reader, semrep_grapher, TextRanker(), eval_params, PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE, lambda x: 1.0 / math.exp(x) if x >= 0 and x < 5 else 0.0, UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA, open(OUTPUT_FILE, 'w')) work.run()
def main(): my_format_string='%(asctime)s %(levelname)s %(module)s.' \ '%(funcName)s: %(message)s' logging.basicConfig(level=logging.INFO, format=my_format_string) chunkmap=chunkmap_factory(pickle.load( bz2.BZ2File('test_data/5th.chunkmap.bz2'))) semrep_reader=SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'), DEFAULT_LINES_TO_IGNORE, chunkmap) tfidf=TF_IDF(file_mode="c") tfidf.build_tf_from_file(semrep_reader) semrep_reader.rewind() semrep_grapher=SemrepCooccurrenceGraphBuilder(node_weight_threshold=0.001, link_weight_threshold=0.003, tf_idf_provider=tfidf ) eval_params=EvaluationParameters() eval_params.alpha=0.65 work=myWorkflow(semrep_reader, semrep_grapher, TextRanker(), eval_params, PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE, lambda x: 1.0/math.exp(x) if x>=0 and x<5 else 0.0, UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA, open(OUTPUT_FILE, 'w')) work.run()
def __init__(self, fileobject, lines_to_ignore, chunkmap, converter): SemrepOutput.__init__(self, fileobject, lines_to_ignore=lines_to_ignore, chunkmap=chunkmap) self._my_converter = converter self.line_type = self.line_factory
def setUp(self): # Test setup borrowed from semrep.py from MEDRank.file.semrep import (SemrepOutput) from MEDRank.file.metamap import (MetamapOutput) from MEDRank.file.chunkmap import chunkmap_factory import StringIO # logging.basicConfig(level=logging.DEBUG, # format='%(asctime)s %(levelname)s %(message)s') # This fakefile is NOT the same as semrep.py - this one has a # relationship that should not be part of the graph, and one that # should sr_file = StringIO.StringIO("SE|0000000000||ti|2|entity|Affecting|" "ftcn|C0392760|involved||||1000|319|326\n" "SE|0000000000||ti|2|entity|Involvement" "with|ftcn|C1314939|involved||" "||1000|319|326\n" "SE|0000000000||ti|2|relation|||Steroid" "hormone|horm,strd|horm|C0301818|" "||||||||901|115|130||INTERACTS_WITH" "||379|385|||steroid hormone" "receptor|gngm,aapp,rcpt|gngm|C0597519" "||None|steroid hormone receptors|their" " respective steroid hormone" " receptors|||||890|390|431\n" "SE|0000000000||ti|2|relation|||Affection" "|horm,strd|horm|C0392760|" "||||||||901|115|130||INTERACTS_WITH" "||379|385|||Involvement" "with|gngm,aapp,rcpt|gngm|C1314939" "||None|steroid hormone receptors|their" " respective steroid hormone" " receptors|||||890|390|431\n" "USELESS LINE!\n" "SE|0000000000||ti|3|text|Coactivator and" " corepressor proteins have recently been" " identified that interact with steroid" " hormone receptors and modulate" " transcriptional activation\n") fake_chunkmap = chunkmap_factory({'123.txt': [0]}) self.sro = SemrepOutput(sr_file, ["USELESS LINE!"], fake_chunkmap)
def __init__(self, fileobject, lines_to_ignore, chunkmap, converter): SemrepOutput.__init__(self, fileobject, lines_to_ignore=lines_to_ignore, chunkmap=chunkmap) self._my_converter=converter self.line_type=self.line_factory