def __init__(self, source, dest, species, interactions, subject, min_spp, min_ints): species_dict = hash_linnaeus_file(species) interacts_dict = hash_linnaeus_file(interactions) #subject = os.path.basename(source) sys.stderr.write("writing output from %s to %s...\n" % (source, dest)) f = codecs.open(dest, "a", "utf8") for article in os.listdir(source): p = ProcessedArticle(os.path.join(source, article)) out = ArticleInteractions(p, species_dict, interacts_dict, min_spp, min_ints) if out.tagged_sentences: sys.stderr.write("...writing %s..." % p.file_name) f.writelines([subject + "\t" + line for line in out.flat_output]) f.close() sys.stderr.write("\nDone.\n")
def processDirectory(self, data_path, pure_path, subjects, inter_tag_path): """ Apply process to a directory of interaction tagfiles""" processDict = {} for subject in subjects: sys.stderr.write("Locating %s interaction terms...\n" % subject) interDict = hash_linnaeus_file(os.path.join(inter_tag_path, subject + "_interactions.tsv")) processDict[subject] = self.process(interDict, data_path, pure_path, subject) return(processDict)