def get_en_dictionary() -> List[str]: # check if file was already created path = os.path.join(os.getcwd(), 'data', 'spellchecker', 'hunspell-en_US-2018', 'en_US.pkl') if not check_if_file_exists(path): logger.info( f'Pre pickeled spellchecker dictionary does not exist at {path}.') # load source dict and process it src_dict_path = os.path.join(os.getcwd(), 'data', 'spellchecker', 'hunspell-en_US-2018', 'en_US.dic') if not check_if_file_exists(src_dict_path): logger.error( 'Could not find source spellchecker file at path {src_dict_path}. Please download it from the website.' ) raise ValueError( f'Source spellchecker file at path {src_dict_path} was not found. Please download it from the website.' ) # process file dictionary = [] with open(src_dict_path, encoding='utf8') as input_file: first_line = True for line in input_file: if first_line: first_line = False continue parts = line.split('/') line = parts[0].replace('\n', '') dictionary.append(line) # save dict with open(path, 'wb') as f: pickle.dump(dictionary, f) logger.info( f'File successfully loaded and created. It is located at {path}') return dictionary # file exists, load it and return it with open(path, 'rb') as f: dictionary = pickle.load(f) if dictionary: logger.info( f'Dictionary successfully unpickeld. Loaded {len(dictionary)} words' ) return dictionary
def _try_load(self, name, fields): path = os.path.join(os.getcwd(), 'data', 'data', 'cache') create_dir_if_necessary(path) samples_path = os.path.join(path, name + "2.pkl") aspects_path = os.path.join(path, name + "_2aspects.pkl") if not check_if_file_exists(samples_path) or not check_if_file_exists( aspects_path): return [], None with open(samples_path, 'rb') as f: examples = pickle.load(f) with open(aspects_path, 'rb') as f: self.aspects = pickle.load(f) # get all fields fields = self._construct_fields(fields) return examples, fields
def get_de_dictionary() -> List[str]: # check if file was already created path = os.path.join(os.getcwd(), 'data', 'spellchecker', 'de', 'de_DE.pkl') if not check_if_file_exists(path): logger.info( f'Pre pickeled spellchecker dictionary does not exist at {path}.') # load source dict and process it src_dict_path = os.path.join(os.getcwd(), 'data', 'spellchecker', 'de', 'german.dic') if not check_if_file_exists(src_dict_path): logger.error( 'Could not find source spellchecker file at path {src_dict_path}. Please download it from the website. (https://sourceforge.net/projects/germandict/files/)' ) raise ValueError( f'Source spellchecker file at path {src_dict_path} was not found. Please download it from the website. (https://sourceforge.net/projects/germandict/files/)' ) # process file dictionary = [] with open(src_dict_path, 'rb') as input_file: for line in input_file: line = line.decode("iso-8859-1", errors='strict') line = line.replace('\n', '') line = line.replace('\r', '') dictionary.append(line) # save dict with open(path, 'wb') as f: pickle.dump(dictionary, f) logger.info( f'File successfully loaded and created. It is located at {path}') return dictionary # file exists, load it and return it with open(path, 'rb') as f: dictionary = pickle.load(f) if dictionary: logger.info( f'Dictionary successfully unpickeld. Loaded {len(dictionary)} words' ) return dictionary
def restore_bow(self, path): """ Restores a specific bag of words by restoring its vocabulary. """ if not utils.check_if_file_exists(path): print "Could not find bow. Path: {0}".format(path) return False self.bow = self.bow.load(path) self.bowTrained = True print "BOW successfully restored." return True
def get_model_dict(): """ Loads and returns the model dictionary.""" # load classifier dictionary path = utils.get_data_path() + "model_dictionary" # initialize if file doesn't exist modelDictionary = {} if utils.check_if_file_exists(path): with open(path, "r") as f: modelDictionary = pickle.load(f) return modelDictionary
def get_organic_dictionary() -> List[str]: # load organic specific entities path = os.path.join(os.getcwd(), 'data', 'spellchecker', 'organic-words.txt') if not check_if_file_exists(path): logger.error( f'Could not find source spellchecker file at path {path}. Please download it from the website.' ) return [] dictionary = [] with open(path, encoding='utf8') as input_file: for line in input_file: dictionary.append(line.replace('\n', '')) return dictionary
def restore_custom_marker(self): """ Restores a custom marker from a file.""" path = utils.get_data_path() + "segmentationMarker" if utils.check_if_file_exists(path): with open(path, "r") as f: markerFile = pickle.load(f) self.MARKER_SIZE = markerFile["markerDimension"] # restore kps self.CUSTOM_MARKER_IMAGE = cv.imread(utils.get_data_path() + "segmentationMarkerImage.jpg", 0) sift = cv.SIFT() kp = sift.detect(self.CUSTOM_MARKER_IMAGE,None) self.CUSTOM_MARKER = (kp, markerFile["marker"]) print "restored custom marker"
def get_organic_words_replacement() -> Dict: # load organic specific entities path = os.path.join(os.getcwd(), 'data', 'spellchecker', 'organic-space-replace.txt') if not check_if_file_exists(path): logger.error( f'Could not find source spellchecker file at path {path}. Please download it from the website.' ) return [] dictionary = {} with open(path, encoding='utf8') as input_file: for line in input_file: line = line.replace('\n', '') k = line.replace('-', '') v = line.replace('-', ' ') dictionary[k] = v return dictionary
def __init__(self, task, experiment_name, experiment_description, default_hp, overwrite_hp, data_loaders, dataset_infos, runs=5, load_model_path=None, produce_baseline=False): # make sure preferences are set assert data_loaders is not None assert len(data_loaders) == len(dataset_infos["data_root"]) assert runs > 0 self.task = task self.experiment_name = experiment_name self.experiment_description = experiment_description self.default_hp = default_hp self.overwrite_hp = overwrite_hp self.use_cuda = torch.cuda.is_available() self.dsls = data_loaders self.dataset_infos = dataset_infos self.runs = runs self.hp = None self.data_frame = pd.DataFrame() self.load_model_path = load_model_path self.skip_source_training = False # skip training if source model loaded self.produce_baseline = produce_baseline print( f'Transfer Learning Experiment {self.experiment_name} initialized. Source: {dataset_infos["data_root"][0]} -> Target {dataset_infos["data_root"][1]}' ) if self.load_model_path is not None: print(f'Try to restore model at ' + self.load_model_path) if not utils.check_if_file_exists(self.load_model_path): print( f'Could not find model path. Please make sure the directory exists.' )
def plot_dataset_stats(self, samples, labels, title, fileName): path = os.path.join(self.img_stats_folder, fileName) # don't generate if already exists if check_if_file_exists(path): return try: df = pd.DataFrame({'Samples': samples, 'Aspect': labels}) plt.figure(figsize=(20, 10)) ax = sns.barplot(data=df, color='b', x='Aspect', y='Samples') plt.title(title, fontsize=20) plt.xticks(rotation=45, ha="right") ax.get_yaxis().get_major_formatter().set_scientific(False) plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) ax.yaxis.set_major_formatter( mpl.ticker.StrMethodFormatter('{x:,.0f}')) plt.savefig(path, format=fileName.split('.')[-1]) except Exception as err: self.logger.exception('Could not plot ' + title)
def load_spellchecker_cache(self, language): path = os.path.join(os.getcwd(), 'data', 'spellchecker', language + '_cache.pkl') if check_if_file_exists(path): with open(path, 'rb') as f: loaded = pickle.load(f) self.spellCheckerReplaced = loaded
def load_model(self, modelUuid, manualPath=None): """ Load model from either model dictionary or manually. Keyword arguments: modelUuid -- uuid of the model. If None the method will try to load the model using the manualPath. manualPath -- if model shall be loaded manually this is the root path of the model directory Returns: model """ # manual mode. Load model that is not part of the model dictionary. if modelUuid is None: classifier = None try: with open(manualPath + "model", "rb") as f: classifier = pickle.load(f) except: logging.exception("Could not load model manually") return None return classifier else: # Load model from model dictionary modelParams = self.get_model_param(modelUuid) modelSavePath = modelParams[4] modelTypeId = modelParams[0] testdata = TestData(modelParams[3], 1, True) if not self.does_model_exist(modelUuid): raise AttributeError( "Model with uuid {0} was not found in model dictionary.". format(modelUuid)) if modelTypeId == "SIFT": from classification.local_features.sift import SIFTClassifier from classification.model import ModelType model = SIFTClassifier(testdata, Settings.E_MODEL_TYPE) model = model.load(modelSavePath) elif modelTypeId == "SURF": from classification.local_features.surf import SURFClassifier from classification.model import ModelType model = SURFClassifier(testdata, Settings.E_MODEL_TYPE) return model.load(modelSavePath) elif modelTypeId == "HIST": from classification.global_features.histogram import HistogramClassifier from classification.model import ModelType model = HistogramClassifier(testdata, Settings.E_MODEL_TYPE) return model.load(modelSavePath) if modelTypeId.startswith("mCL"): from classification.late_fusion import MajorityClassifier model = MajorityClassifier(testdata) try: with open(modelSavePath + "model", "r") as f: model = pickle.load(f) except: logging.exception("Could not load majority classifier.") return None return model # NNs or CNNs if modelTypeId.startswith("NN") or modelTypeId.startswith("CNN"): from classification.deep.neural_net import * # load testdata because we need the output shape modelWrapper = NeuralNetClassifier(testdata, modelParams[3]) # search for best weights if not utils.check_if_file_exists(modelSavePath + "model"): print "[!] Model file {0} was not found.".format( modelSavePath + "model") continue_ = utils.radio_question( "[?]", "It might be possible to restore the model using the weights file. Continue?", None, ["Yes", "No"], [True, False]) if not continue_: delete = utils.radio_question("[?]", "Delete model?", None, ["Yes", "No"], [True, False]) if delete: remove_model(modelUuid) raise Exception("Model file does not exist.") # try to restore best weights if more recent bestWeights = None if modelParams[7] == "nn_weights" and utils.check_if_file_exists( modelSavePath + "best_weights"): bestWeights = modelSavePath + "best_weights" modelWrapper.load_model(modelSavePath + "model", bestWeights) # restore params modelWrapper.modelSaver.bestLoss = modelParams[5] modelWrapper.modelSaver.modelDescription = modelParams[2] modelWrapper.modelSaver.modelUuid = modelUuid return modelWrapper if modelTypeId is None or modelTypeId == "None": print "There was a problem loading this model {0}. The save file might be corrupted. Model Dictionary {1}".format( modelTypeId, modelParams) if utils.radio_question("[?]", "Repair model with new model type ID?", None, ["Yes", "No"], [True, False]): modelTypeId = utils.value_question("[?]", "Model ID", "s") update_model_dict(modelUuid, 0, modelTypeId) print "Model Id changed. Restart application and try again." raw_input("Press any key to continue.") import sys sys.exit() raise Exception("Could not repair model.") else: raise Exception( "Model {0} is not supported yet.".format(modelTypeId))