Пример #1
0
	def __init__(self, filename='awesome.model', type=libml.ALL):
		model_directory = os.path.dirname(filename)

		if model_directory != "":
			helper.mkpath(model_directory)

		self.filename = os.path.realpath(filename)
		self.type = type
		self.vocab = {}
		
		self.enabled_features = Model.sentence_features | Model.word_features
Пример #2
0
    def __init__(self):
        # Ensure cache dir exists
        cache_dir = os.path.join(os.getenv('CLINER_DIR'), 'caches')
        helper.mkpath(cache_dir)

        # Read data
        self.filename = os.path.join(cache_dir, 'url.cache')
        try:
            self.cache = load_pickled_obj(self.filename)
        except IOError:
            self.cache = {}

        self.new = {}
Пример #3
0
    def __init__(self):
        # Ensure cache dir exists
        cache_dir = os.path.join(os.getenv('CLINER_DIR'), 'caches')
        helper.mkpath(cache_dir)

        # Read data
        self.filename = os.path.join(cache_dir, 'url.cache')
        try:
            self.cache = load_pickled_obj(self.filename)
        except IOError:
            self.cache = {}

        self.new = {}
Пример #4
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("-i",
        dest = "input",
        help = "The input files to predict",
        default = os.path.join(os.getenv('CLINER_DIR'), 'data/test_data/*')
    )

    parser.add_argument("-o",
        dest = "output",
        help = "The directory to write the output",
        default = os.path.join(os.getenv('CLINER_DIR'), 'data/test_predictions')
    )

    parser.add_argument("-m",
        dest = "model",
        help = "The model to use for prediction",
        default = os.path.join(os.getenv('CLINER_DIR'), 'models/run.model')
    )

    parser.add_argument("-f",
        dest = "format",
        help = "Data format ( " + ' | '.join(Note.supportedFormats()) + " )",
        default = 'i2b2'
    )

    parser.add_argument("-crf",
        dest = "with_crf",
        help = "Specify where to find crfsuite",

      default = None
    )

    args = parser.parse_args()


    # Parse arguments
    files = glob.glob(args.input)
    helper.mkpath(args.output)
    format = args.format


    # Predict
    predict(files, args.model, args.output, format=format)
Пример #5
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("-i", 
		dest = "input", 
		help = "The input files to predict", 
		default = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/test_data/*')
	)

	parser.add_argument("-o", 
		dest = "output", 
		help = "The directory to write the output", 
		default = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/test_predictions')
	)

	parser.add_argument("-m",
		dest = "model",
		help = "The model to use for prediction",
		default = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../models/awesome.model')
	)
	
	parser.add_argument("--no-svm",
		dest = "no_svm",
		action = "store_true",
		help = "Disable SVM model generation",
	)

	parser.add_argument("--no-lin",
		dest = "no_lin",
		action = "store_true",
		help = "Disable LIN model generation",
	)

	parser.add_argument("--no-crf",
		dest = "no_crf",
		action = "store_true",
		help = "Disable CRF model generation",
	)
	
	args = parser.parse_args()

	# Locate the test files
	files = glob.glob(args.input)

	# Load a model and make a prediction for each file
	path = args.output
	helper.mkpath(args.output)

	model = Model.load(args.model)
	if args.no_svm:
		model.type &= ~libml.SVM
	if args.no_lin:
		model.type &= ~libml.LIN
	if args.no_crf:
		model.type &= ~libml.CRF
		
	for txt in files:
		data = read_txt(txt)
		labels = model.predict(data)
		con = os.path.split(txt)[-1]
		con = con[:-3] + 'con'
		
		for t in libml.bits(model.type):
			if t == libml.SVM:
				helper.mkpath(os.path.join(args.output, "svm"))
				con_path = os.path.join(path, "svm", con)
			if t == libml.LIN:
				helper.mkpath(os.path.join(args.output, "lin"))
				con_path = os.path.join(path, "lin", con)
			if t == libml.CRF:
				helper.mkpath(os.path.join(args.output, "crf"))
				con_path = os.path.join(path, "crf", con)
				
			write_con(con_path, data, labels[t])
Пример #6
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-i",
        dest="input",
        help="The input files to predict",
    )

    parser.add_argument(
        "-o",
        dest="output",
        help="The directory to write the output",
    )

    parser.add_argument(
        "-m",
        dest="model",
        help="The model to use for prediction",
    )

    parser.add_argument(
        "-f",
        dest="format",
        help="Data format ( " + ' | '.join(Note.supportedFormats()) + " )",
    )

    parser.add_argument("-crf",
                        dest="with_crf",
                        help="Specify where to find crfsuite",
                        default=None)

    parser.add_argument(
        "-discontiguous_spans",
        dest="third",
        help="A flag indicating whether to have third/clustering pass",
        action="store_true")

    parser.add_argument(
        "-umls_disambiguation",
        dest="disambiguate",
        help=
        "A flag indicating whether to disambiguate CUI ID for identified entities in semeval",
        action="store_true")

    args = parser.parse_args()

    # Error check: Ensure that file paths are specified
    if not args.input:
        print >> sys.stderr, '\n\tError: Must provide text files\n'
        exit(1)
    if not args.output:
        print >> sys.stderr, '\n\tError: Must provide output directory\n'
        exit(1)
    if not args.model:
        print >> sys.stderr, '\n\tError: Must provide path to model\n'
        exit(1)
    if not os.path.exists(args.model):
        print >> sys.stderr, '\n\tError: Model does not exist: %s\n' % args.model
        exit(1)

    # Parse arguments
    files = glob.glob(args.input)
    helper.mkpath(args.output)

    third = args.third

    if args.format:
        format = args.format
    else:
        print '\n\tERROR: must provide "format" argument\n'
        exit()

    if third is True and args.format == "i2b2":
        exit("i2b2 formatting does not support disjoint spans")

    # Tell user if not predicting
    if not files:
        print >> sys.stderr, "\n\tNote: You did not supply any input files\n"
        exit()

    # Predict
    predict(files,
            args.model,
            args.output,
            format=format,
            third=third,
            disambiguate=args.disambiguate)
Пример #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-i",
        dest="input",
        help="The input files to predict",
        default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/test_data/*"),
    )

    parser.add_argument(
        "-o",
        dest="output",
        help="The directory to write the output",
        default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/test_predictions"),
    )

    parser.add_argument(
        "-m",
        dest="model",
        help="The model to use for prediction",
        default=os.path.join(os.path.dirname(os.path.realpath(__file__)), "../models/awesome.model"),
    )

    parser.add_argument("--no-svm", dest="no_svm", action="store_true", help="Disable SVM model generation")

    parser.add_argument("--no-lin", dest="no_lin", action="store_true", help="Disable LIN model generation")

    parser.add_argument("--no-crf", dest="no_crf", action="store_true", help="Disable CRF model generation")

    args = parser.parse_args()

    # Locate the test files
    files = glob.glob(args.input)

    # Load a model and make a prediction for each file
    path = args.output
    helper.mkpath(args.output)

    model = Model.load(args.model)
    if args.no_svm:
        model.type &= ~libml.SVM
    if args.no_lin:
        model.type &= ~libml.LIN
    if args.no_crf:
        model.type &= ~libml.CRF

    for txt in files:
        data = read_txt(txt)
        labels = model.predict(data)
        con = os.path.split(txt)[-1]
        con = con[:-3] + "con"

        for t in libml.bits(model.type):
            if t == libml.SVM:
                helper.mkpath(os.path.join(args.output, "svm"))
                con_path = os.path.join(path, "svm", con)
            if t == libml.LIN:
                helper.mkpath(os.path.join(args.output, "lin"))
                con_path = os.path.join(path, "lin", con)
            if t == libml.CRF:
                helper.mkpath(os.path.join(args.output, "crf"))
                con_path = os.path.join(path, "crf", con)

            write_con(con_path, data, labels[t])