def setUpClass(cls): # ORM environment and database setup iepy.setup() cls.dj_runner = DiscoverRunner() cls.dj_runner.setup_test_environment() cls.old_config = cls.dj_runner.setup_databases() # Creating Manager instance (if requested) if hasattr(cls, 'ManagerClass'): cls.manager = cls.ManagerClass()
def upgrade(self): if not os.path.exists(self.folder_path): print("Error: instance folder does not exist") sys.exit(1) try: actual_path = iepy.setup(self.folder_path, _safe_mode=True) except ValueError as err: print(err) sys.exit(1) finally: self.folder_path = actual_path self.abs_folder_path = os.path.abspath(self.folder_path) from django.conf import settings self.old_version = settings.IEPY_VERSION if settings.IEPY_VERSION == iepy.__version__: print("Iepy instance '{}' is already up to date.".format( self.folder_path)) return print("Upgrading iepy instance '{}' from {} to {}".format( self.folder_path, self.old_version, iepy.__version__)) self.creating = False self.old_version_path = self.download_old_iepy_version() self._run_steps()
def upgrade(self): if not os.path.exists(self.folder_path): print("Error: instance folder does not exist") sys.exit(1) try: actual_path = iepy.setup(self.folder_path, _safe_mode=True) except ValueError as err: print(err) sys.exit(1) finally: self.folder_path = actual_path self.abs_folder_path = os.path.abspath(self.folder_path) from django.conf import settings self.old_version = settings.IEPY_VERSION if settings.IEPY_VERSION == iepy.__version__: print("Iepy instance '{}' is already up to date.".format(self.folder_path)) return print("Upgrading iepy instance '{}' from {} to {}".format( self.folder_path, self.old_version, iepy.__version__)) self.creating = False self.old_version_path = self.download_old_iepy_version() self._run_steps()
Birthdate corpus preprocessing script Usage: preprocess.py preprocess.py -h | --help | --version Options: -h --help Show this screen --version Version number """ import logging from docopt import docopt import iepy iepy.setup(__file__) from iepy.data.db import DocumentManager from iepy.preprocess.stanford_preprocess import StanfordPreprocess from iepy.preprocess.pipeline import PreProcessPipeline from iepy.preprocess.segmenter import SyntacticSegmenterRunner if __name__ == '__main__': logger = logging.getLogger(u'preprocess') logger.setLevel(logging.INFO) logging.basicConfig(level=logging.INFO, format='%(message)s') opts = docopt(__doc__, version=iepy.__version__) docs = DocumentManager() pipeline = PreProcessPipeline([ StanfordPreprocess(), SyntacticSegmenterRunner(increment=True)
""" IEPY database loader from csv file Usage: csv_to_iepy.py <filename> csv_to_iepy.py -h | --help The <filename> argument can be a .csv file or a .csv.gz file containing the corpus in two columns: 'freebase_mid' and 'description'. Options: -h --help Show this screen --version Version number """ import logging from docopt import docopt import iepy iepy.setup(__file__) from iepy.utils import csv_to_iepy if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='%(message)s') opts = docopt(__doc__, version=iepy.__version__) filepath = opts["<filename>"] csv_to_iepy(filepath)
""" IEPY DB Abstraction level. The goal of this module is to provide some thin abstraction between the chosen database engine and ORM and the IEPY core and tools. """ from collections import defaultdict, namedtuple from functools import lru_cache from random import shuffle import logging import iepy iepy.setup() from iepy.data.models import (IEDocument, IEDocumentMetadata, TextSegment, Relation, Entity, EntityKind, EntityOccurrence, EvidenceLabel, EvidenceCandidate) from iepy.preprocess import segmenter from iepy.preprocess.pipeline import PreProcessSteps IEPYDBConnector = namedtuple('IEPYDBConnector', 'segments documents') # Number of entities that will be cached on get_entity function. ENTITY_CACHE_SIZE = 20 # reasonable compromise logger = logging.getLogger(__name__) class DocumentManager(object):
def migrate_db(self): # Setup IEPY with the new instance os.chdir(self.abs_folder_path) iepy.setup(self.abs_folder_path) django_command_line(["", "migrate"])
""" IEPY DB Abstraction level. The goal of this module is to provide some thin abstraction between the chosen database engine and ORM and the IEPY core and tools. """ from collections import defaultdict, namedtuple from functools import lru_cache from random import shuffle import logging import iepy iepy.setup() from iepy.data.models import ( IEDocument, IEDocumentMetadata, TextSegment, Relation, Entity, EntityKind, EntityOccurrence, EvidenceLabel, EvidenceCandidate ) from iepy.preprocess import segmenter from iepy.preprocess.pipeline import PreProcessSteps IEPYDBConnector = namedtuple('IEPYDBConnector', 'segments documents') # Number of entities that will be cached on get_entity function. ENTITY_CACHE_SIZE = 20 # reasonable compromise
def execute_from_command_line(argv=None): opts = docopt(__doc__, argv=argv, version=iepy.__version__) folder_path = opts["<folder_path>"] if opts["--download-third-party-data"]: download_third_party_data() return abs_folder_path = os.path.abspath(folder_path) if os.path.exists(folder_path): print("Error: folder already exists") sys.exit(1) files_to_copy = [ os.path.join(THIS_FOLDER, "csv_to_iepy.py"), os.path.join(THIS_FOLDER, "preprocess.py"), os.path.join(THIS_FOLDER, "iepy_runner.py"), os.path.join(THIS_FOLDER, "iepy_rules_runner.py"), os.path.join(THIS_FOLDER, "manage.py"), ] # Create folders bin_folder = os.path.join(folder_path, "bin") os.mkdir(folder_path) os.mkdir(bin_folder) for filepath in files_to_copy: filename = os.path.basename(filepath) destination = os.path.join(bin_folder, filename) shutil.copyfile(filepath, destination) # Create empty rules file rules_filepath = os.path.join(folder_path, "rules.py") with open(rules_filepath, "w") as filehandler: filehandler.write("# Write here your rules\n") filehandler.write("# RELATION = 'your relation here'\n") # Create extractor config extractor_config_filepath = os.path.join(folder_path, "extractor_config.json") with open(extractor_config_filepath, "w") as filehandler: json.dump(defaults.extractor_config, filehandler, indent=4) # Create the settings file print("Initializing database") folder_name = folder_path.rsplit(os.sep, 1) folder_name = folder_name[1] if len(folder_name) > 1 else folder_name[0] database_name = input("Database name [{}]: ".format(folder_name)) if not database_name: database_name = folder_name database_path = os.path.join(abs_folder_path, database_name) new_settings_filepath = "{}_settings.py".format(folder_name) settings_filepath = os.path.join(folder_path, new_settings_filepath) settings_data = get_settings_string(database_path) with open(settings_filepath, "w") as filehandler: filehandler.write(settings_data) # Setup IEPY with the new instance os.chdir(abs_folder_path) iepy.setup(abs_folder_path) django_command_line(["", "migrate"]) # Setup the database user print("\nCreating database user") django_command_line(["", "createsuperuser"]) print("\n IEPY instance ready to use at '{}'".format(abs_folder_path))
as is. Should be used with --trained-extractor --tune-for=<tune-for> Predictions tuning. Options are high-prec or high-recall [default: high-prec] --extractor-config=<config.json> Sets the extractor config --version Version number -h --help Show this screen """ import os import json import logging from docopt import docopt from sys import exit import iepy INSTANCE_PATH = iepy.setup(__file__) from iepy.extraction.active_learning_core import ActiveLearningCore, HIPREC, HIREC from iepy.data.db import CandidateEvidenceManager from iepy.data.models import Relation from iepy.extraction.terminal import TerminalAdministration from iepy.data import output def print_all_relations(): print("All available relations:") for relation in Relation.objects.all(): print(" {}".format(relation)) def load_labeled_evidences(relation, evidences):
def execute_from_command_line(argv=None): opts = docopt(__doc__, argv=argv, version=0.1) folder_path = opts["<folder_path>"] if opts["--download-third-party-data"]: download_third_party_data() return abs_folder_path = os.path.abspath(folder_path) if os.path.exists(folder_path): print("Error: folder already exists") sys.exit(1) files_to_copy = [ os.path.join(THIS_FOLDER, "csv_to_iepy.py"), os.path.join(THIS_FOLDER, "preprocess.py"), os.path.join(THIS_FOLDER, "iepy_runner.py"), os.path.join(THIS_FOLDER, "iepy_rules_runner.py"), os.path.join(THIS_FOLDER, "manage.py"), ] # Create folders bin_folder = os.path.join(folder_path, "bin") os.mkdir(folder_path) os.mkdir(bin_folder) for filepath in files_to_copy: filename = os.path.basename(filepath) destination = os.path.join(bin_folder, filename) shutil.copyfile(filepath, destination) # Create empty rules file rules_filepath = os.path.join(folder_path, "rules.py") with open(rules_filepath, "w") as filehandler: filehandler.write("# Write here your rules\n") filehandler.write("# RELATION = 'your relation here'\n") # Create extractor config extractor_config_filepath = os.path.join(folder_path, "extractor_config.json") with open(extractor_config_filepath, "w") as filehandler: json.dump(defaults.extractor_config, filehandler, indent=4) # Create the settings file print("Initializing database") folder_name = folder_path.rsplit(os.sep, 1) folder_name = folder_name[1] if len(folder_name) > 1 else folder_name[0] database_name = input("Database name [{}]: ".format(folder_name)) if not database_name: database_name = folder_name database_path = os.path.join(abs_folder_path, database_name) new_settings_filepath = "{}_settings.py".format(folder_name) settings_filepath = os.path.join(folder_path, new_settings_filepath) settings_data = get_settings_string(database_path) with open(settings_filepath, "w") as filehandler: filehandler.write(settings_data) # Setup IEPY with the new instance os.chdir(abs_folder_path) iepy.setup(abs_folder_path) django_command_line(["", "migrate"]) # Setup the database user print("\nCreating database user") django_command_line(["", "createsuperuser"]) print("\n IEPY instance ready to use at '{}'".format(abs_folder_path))
--classifier=<classifier_path> Load an already trained classifier --no-questions Won't generate questions to answer and will try to predict as is. Should be used with --classifier -h --help Show this screen --tune-for=<tune-for> Predictions tuning. Options are high-prec or high-recall [default: high-prec] --extractor-config=<config.json> Sets the extractor config --version Version number """ import os import json import logging from docopt import docopt from sys import exit import iepy INSTANCE_PATH = iepy.setup(__file__) from iepy.extraction.active_learning_core import ActiveLearningCore, HIPREC, HIREC from iepy.data.db import CandidateEvidenceManager from iepy.data.models import Relation from iepy.extraction.terminal import TerminalAdministration from iepy.data import output def print_all_relations(): print("All available relations:") for relation in Relation.objects.all(): print(" {}".format(relation)) def load_labeled_evidences(relation, evidences):
# iepy setup in order to be able to run individual test modules import iepy iepy.setup(_safe_mode=True)