def __init__(self, filename='awesome.model', type=libml.ALL): model_directory = os.path.dirname(filename) if model_directory != "": helper.mkpath(model_directory) self.filename = os.path.realpath(filename) self.type = type self.vocab = {} self.enabled_features = Model.sentence_features | Model.word_features
def __init__(self): # Ensure cache dir exists cache_dir = os.path.join(os.getenv('CLINER_DIR'), 'caches') helper.mkpath(cache_dir) # Read data self.filename = os.path.join(cache_dir, 'url.cache') try: self.cache = load_pickled_obj(self.filename) except IOError: self.cache = {} self.new = {}
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", dest = "input", help = "The input files to predict", default = os.path.join(os.getenv('CLINER_DIR'), 'data/test_data/*') ) parser.add_argument("-o", dest = "output", help = "The directory to write the output", default = os.path.join(os.getenv('CLINER_DIR'), 'data/test_predictions') ) parser.add_argument("-m", dest = "model", help = "The model to use for prediction", default = os.path.join(os.getenv('CLINER_DIR'), 'models/run.model') ) parser.add_argument("-f", dest = "format", help = "Data format ( " + ' | '.join(Note.supportedFormats()) + " )", default = 'i2b2' ) parser.add_argument("-crf", dest = "with_crf", help = "Specify where to find crfsuite", default = None ) args = parser.parse_args() # Parse arguments files = glob.glob(args.input) helper.mkpath(args.output) format = args.format # Predict predict(files, args.model, args.output, format=format)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", dest = "input", help = "The input files to predict", default = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/test_data/*') ) parser.add_argument("-o", dest = "output", help = "The directory to write the output", default = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/test_predictions') ) parser.add_argument("-m", dest = "model", help = "The model to use for prediction", default = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../models/awesome.model') ) parser.add_argument("--no-svm", dest = "no_svm", action = "store_true", help = "Disable SVM model generation", ) parser.add_argument("--no-lin", dest = "no_lin", action = "store_true", help = "Disable LIN model generation", ) parser.add_argument("--no-crf", dest = "no_crf", action = "store_true", help = "Disable CRF model generation", ) args = parser.parse_args() # Locate the test files files = glob.glob(args.input) # Load a model and make a prediction for each file path = args.output helper.mkpath(args.output) model = Model.load(args.model) if args.no_svm: model.type &= ~libml.SVM if args.no_lin: model.type &= ~libml.LIN if args.no_crf: model.type &= ~libml.CRF for txt in files: data = read_txt(txt) labels = model.predict(data) con = os.path.split(txt)[-1] con = con[:-3] + 'con' for t in libml.bits(model.type): if t == libml.SVM: helper.mkpath(os.path.join(args.output, "svm")) con_path = os.path.join(path, "svm", con) if t == libml.LIN: helper.mkpath(os.path.join(args.output, "lin")) con_path = os.path.join(path, "lin", con) if t == libml.CRF: helper.mkpath(os.path.join(args.output, "crf")) con_path = os.path.join(path, "crf", con) write_con(con_path, data, labels[t])
def main(): parser = argparse.ArgumentParser() parser.add_argument( "-i", dest="input", help="The input files to predict", ) parser.add_argument( "-o", dest="output", help="The directory to write the output", ) parser.add_argument( "-m", dest="model", help="The model to use for prediction", ) parser.add_argument( "-f", dest="format", help="Data format ( " + ' | '.join(Note.supportedFormats()) + " )", ) parser.add_argument("-crf", dest="with_crf", help="Specify where to find crfsuite", default=None) parser.add_argument( "-discontiguous_spans", dest="third", help="A flag indicating whether to have third/clustering pass", action="store_true") parser.add_argument( "-umls_disambiguation", dest="disambiguate", help= "A flag indicating whether to disambiguate CUI ID for identified entities in semeval", action="store_true") args = parser.parse_args() # Error check: Ensure that file paths are specified if not args.input: print >> sys.stderr, '\n\tError: Must provide text files\n' exit(1) if not args.output: print >> sys.stderr, '\n\tError: Must provide output directory\n' exit(1) if not args.model: print >> sys.stderr, '\n\tError: Must provide path to model\n' exit(1) if not os.path.exists(args.model): print >> sys.stderr, '\n\tError: Model does not exist: %s\n' % args.model exit(1) # Parse arguments files = glob.glob(args.input) helper.mkpath(args.output) third = args.third if args.format: format = args.format else: print '\n\tERROR: must provide "format" argument\n' exit() if third is True and args.format == "i2b2": exit("i2b2 formatting does not support disjoint spans") # Tell user if not predicting if not files: print >> sys.stderr, "\n\tNote: You did not supply any input files\n" exit() # Predict predict(files, args.model, args.output, format=format, third=third, disambiguate=args.disambiguate)
def main(): parser = argparse.ArgumentParser() parser.add_argument( "-i", dest="input", help="The input files to predict", default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/test_data/*"), ) parser.add_argument( "-o", dest="output", help="The directory to write the output", default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/test_predictions"), ) parser.add_argument( "-m", dest="model", help="The model to use for prediction", default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models/awesome.model"), ) parser.add_argument("--no-svm", dest="no_svm", action="store_true", help="Disable SVM model generation") parser.add_argument("--no-lin", dest="no_lin", action="store_true", help="Disable LIN model generation") parser.add_argument("--no-crf", dest="no_crf", action="store_true", help="Disable CRF model generation") args = parser.parse_args() # Locate the test files files = glob.glob(args.input) # Load a model and make a prediction for each file path = args.output helper.mkpath(args.output) model = Model.load(args.model) if args.no_svm: model.type &= ~libml.SVM if args.no_lin: model.type &= ~libml.LIN if args.no_crf: model.type &= ~libml.CRF for txt in files: data = read_txt(txt) labels = model.predict(data) con = os.path.split(txt)[-1] con = con[:-3] + "con" for t in libml.bits(model.type): if t == libml.SVM: helper.mkpath(os.path.join(args.output, "svm")) con_path = os.path.join(path, "svm", con) if t == libml.LIN: helper.mkpath(os.path.join(args.output, "lin")) con_path = os.path.join(path, "lin", con) if t == libml.CRF: helper.mkpath(os.path.join(args.output, "crf")) con_path = os.path.join(path, "crf", con) write_con(con_path, data, labels[t])