def __init__(self, corpusdir, **kwargs): self.mirbase = MirbaseDB(config.mirbase_path) self.mirbase.load_graph() self.mirnas = {} self.tfs = {} self.pairs = {} self.pmids = set() self.normalized_mirnas = set() # normalized to miRBase self.normalized_tfs = set() #normalized to maybe UniProt self.normalized_pairs = set() self.db_path = corpusdir self.load_database() super(TransmirCorpus, self).__init__(corpusdir, self.pmids, **kwargs)
mirna_stopwords = set([ "mediated", "expressing", "deficient", "transfected", "dependent", "family", "specific", "null", "independent", "dependant", "overexpressing", "binding", "targets", "induced" ]) # "mirna", "mirnas", "mir", "hsa-mir"]) mirna_nextstopwords = set(["inhibitor"]) with open(config.stoplist, 'r') as stopfile: for l in stopfile: w = l.strip().lower() if w not in mirna_stopwords and len(w) > 1: mirna_stopwords.add(w) mirna_stopwords.discard("let") mirna_graph = MirbaseDB(config.mirbase_path) mirna_graph.load_graph() class MirnaEntity(Entity): def __init__(self, tokens, sid, *args, **kwargs): # Entity.__init__(self, kwargs) super(MirnaEntity, self).__init__(tokens, **kwargs) self.type = "mirna" self.subtype = kwargs.get("subtype") self.mirna_acc = None self.mirna_name = 0 self.sid = sid self.nextword = kwargs.get("nextword") self.go_ids = []