示例#1
0
 def __init__(self, corpusdir, **kwargs):
     self.mirbase = MirbaseDB(config.mirbase_path)
     self.mirbase.load_graph()
     self.mirnas = {}
     self.tfs = {}
     self.pairs = {}
     self.pmids = set()
     self.normalized_mirnas = set()  # normalized to miRBase
     self.normalized_tfs = set()  #normalized to maybe UniProt
     self.normalized_pairs = set()
     self.db_path = corpusdir
     self.load_database()
     super(TransmirCorpus, self).__init__(corpusdir, self.pmids, **kwargs)
示例#2
0
mirna_stopwords = set([
    "mediated", "expressing", "deficient", "transfected", "dependent",
    "family", "specific", "null", "independent", "dependant", "overexpressing",
    "binding", "targets", "induced"
])
# "mirna", "mirnas", "mir", "hsa-mir"])

mirna_nextstopwords = set(["inhibitor"])
with open(config.stoplist, 'r') as stopfile:
    for l in stopfile:
        w = l.strip().lower()
        if w not in mirna_stopwords and len(w) > 1:
            mirna_stopwords.add(w)
mirna_stopwords.discard("let")
mirna_graph = MirbaseDB(config.mirbase_path)
mirna_graph.load_graph()


class MirnaEntity(Entity):
    def __init__(self, tokens, sid, *args, **kwargs):
        # Entity.__init__(self, kwargs)
        super(MirnaEntity, self).__init__(tokens, **kwargs)
        self.type = "mirna"
        self.subtype = kwargs.get("subtype")
        self.mirna_acc = None
        self.mirna_name = 0
        self.sid = sid
        self.nextword = kwargs.get("nextword")
        self.go_ids = []