def test_path_resolution(): '''Test that paths are correctly resolved''' from config import Path path = Path() expected = pathlib.Path(os.environ['HOME']) / 'bar' assert path.validate('~/foo/../bar') == expected
class Writer(object): """Write HTML fragments generated by docutils parser.""" def __init__(self, config): #self.config = config self.path = Path(config) self.parser = Parser(config) def run(self): for source_abspath in self.parser.get_all_files(): print source_abspath fragment = self.parser.get_fragment(source_abspath) fragment_abspath = self.path.get_fragment_abspath(source_abspath) self.write_fragment(fragment, fragment_abspath) print "Done." def write_fragment(self, fragment, fragment_abspath): with self.open_fragment_file(fragment_abspath) as fout: fout.write(fragment.encode('utf-8') + '\n') def open_fragment_file(self, fragment_abspath): self.make_fragment_dir(fragment_abspath) return open(fragment_abspath, "w") def make_fragment_dir(self, fragment_abspath): fragment_dir = os.path.dirname(fragment_abspath) if not os.path.isdir(fragment_dir): os.makedirs(fragment_dir)
def create_dataset_folder_structure(): """ Creates the folder structure for the new dataset_operations. """ path = Path(f'{DATASETS}/{FEATURES_DATASET}') if not os.path.exists(path): print( f'\nWARNING: The path does not exist. Creating new directory...\n{path}\n' ) os.mkdir(path) try: for path in new_sensor_paths: if not os.path.exists(path): print( f'\nWARNING: The path does not exist. Creating new directory...\n{path}\n' ) os.mkdir(path) else: print("\nPath already exists!") except: return False else: return True
def __init__(self, config, graph): self.config = config self.graph = graph self.path = Path(config) self.parser = Parser(config) self.writer = Writer(config) self.loader = Loader(config, graph)
def calculate_weigths_labels(dataset, dataloader, num_classes): # Create an instance from the data loader z = np.zeros((num_classes, )) # Initialize tqdm tqdm_batch = tqdm(dataloader) print('Calculating classes weights') for sample in tqdm_batch: y = sample['label'] y = y.detach().cpu().numpy() mask = (y >= 0) & (y < num_classes) labels = y[mask].astype(np.uint8) count_l = np.bincount(labels, minlength=num_classes) z += count_l tqdm_batch.close() total_frequency = np.sum(z) class_weights = [] for frequency in z: class_weight = 1 / (np.log(1.02 + (frequency / total_frequency))) class_weights.append(class_weight) ret = np.array(class_weights) classes_weights_path = os.path.join(Path.db_root_dir(dataset), dataset + '_classes_weights.npy') np.save(classes_weights_path, ret) return ret
def collect_memory_snapshot(self, pid="sys"): """ Collecting memory snapshot into csv files :pid: process or package, by default getting snapshot from system """ meminfo = self._get_sys_meminfo( ) if pid == "sys" else self._get_ps_meminfo(pid) headers_row = [i for i in meminfo.keys()] values_row = [i for i in meminfo.values()] path = Path.sys() if pid == "sys" else Path.pid(pid) if os.path.isfile(path): CSV.append_row(path, values_row) else: CSV.append_row(path, headers_row) CSV.append_row(path, values_row)
def _add_process(pid): with open(Path.template_process()) as file: soup = BeautifulSoup(file, features="html.parser") tag_graph = soup.find(id="graph") tag_graph.string = Graph.gen_pid_graph(pid) tag_meminfo = soup.find(id="meminfo") tag_meminfo.string = "<object height=100% width=100% type='text/plain' data=\"./{pid}.txt\"></object>".format( pid=pid) return soup
def __init__(self, config, graph): #self.changelog = ChangeLog(config) self.path = Path(config) changelog_dir = self.path.get_working_etc() self.changelog = ChangeLog(changelog_dir) self.changelog.initialize(config) self.parser = Parser(config) self.graph = graph
def process_repository(session, status, repository, query_iter): query_iter = list(query_iter) zip_path = None tarzip = None if not repository.path.exists(): if not repository.zip_path.exists(): repository.processed |= consts.R_UNAVAILABLE_FILES session.add(repository) status.count += len(query_iter) return "Failed. Repository not found: {}".format(repository) tarzip = tarfile.open(str(repository.zip_path)) zip_path = Path(repository.hash_dir2) shell = InteractiveShell.instance() group = groupby( query_iter, lambda x: (x[1]) ) for notebook, new_iter in group: cells = list(query_iter) vprint(1, "Processing notebook: {}. Found {} cells".format(notebook, len(cells))) name = notebook.name vprint(2, "Loading notebook file") if tarzip: notebook = nbf.read( tarzip.extractfile(tarzip.getmember(str(zip_path / name))), nbf.NO_CONVERT ) else: with open(str(repository.path / name)) as ofile: notebook = nbf.read(ofile, nbf.NO_CONVERT) notebook = nbf.convert(notebook, 4) metadata = notebook["metadata"] language_info = metadata.get("language_info", {}) language_name = language_info.get("name", "unknown") for cell, _, _ in new_iter: vprint(2, "Loading cell {}".format(cell.index)) index = int(cell.index) notebook_cell = notebook["cells"][index] source = notebook_cell.get("source", "") if language_name == "python" and notebook_cell.get("cell_type") == "code": try: source = shell.input_transformer_manager.transform_cell(source) except (IndentationError, SyntaxError): pass cell.source = source if cell.processed & consts.C_MARKED_FOR_EXTRACTION: cell.processed -= consts.C_MARKED_FOR_EXTRACTION session.add(cell) session.commit() return "ok"
def sort_dataset_by_age(): """ Sorts the Dataset created by create_dataset() into a new Age sorted Dataset. """ data = read_csv(Path(f'{data_files_path}/subject_data')) limits = get_limits(ageGroups) sortedCount = 0 # For every age bin for target_folder, limit in limits.items(): # Get the indexes of all files to be copied to the target folder index_list = list(data[(data['Age'] >= limit[0]) & (data['Age'] <= limit[1])].index) subjectCount = 0 # For every file to be copied for i in index_list: filename = data.iloc[i]['Filename'] temp = sortedCount # Get the source and destination file paths for src, dest in zip(new_sensor_paths, sensor_dirs[target_folder]): # if the file exists in the source directory if os.path.exists(Path(f'{src}/{filename[:-4]}.csv')): # copy it to the destination directory copyfile(Path(f'{src}/{filename[:-4]}.csv'), Path(f'{dest}/{filename[:-4]}.csv')) if temp == sortedCount: sortedCount += 1 subjectCount += 1 # print(f'src = {src}\ndest = {dest}\n\n') print(f'\n# of Subjects in "{target_folder}" = {subjectCount}') print( f'\nTotal subjects sorted = {sortedCount} ({round((sortedCount / len(data)) * 100, 2)}% of total data)\n' )
def __init__( self, args, base_dir=Path.db_root_dir('pascal'), split='train', ): """ :param base_dir: path to VOC dataset directory :param split: train/val :param transform: transform to apply """ super().__init__() self._base_dir = base_dir self._image_dir = os.path.join(self._base_dir, 'JPEGImages') self._cat_dir = os.path.join(self._base_dir, 'SegmentationClass') if isinstance(split, str): self.split = [split] else: split.sort() self.split = split self.args = args _splits_dir = os.path.join(self._base_dir, 'ImageSets', 'Segmentation') self.im_ids = [] self.images = [] self.categories = [] for splt in self.split: with open(os.path.join(os.path.join(_splits_dir, splt + '.txt')), "r") as f: lines = f.read().splitlines() for ii, line in enumerate(lines): _image = os.path.join(self._image_dir, line + ".jpg") _cat = os.path.join(self._cat_dir, line + ".png") assert os.path.isfile(_image) assert os.path.isfile(_cat) self.im_ids.append(line) self.images.append(_image) self.categories.append(_cat) assert (len(self.images) == len(self.categories)) # Display stats print('Number of images in {}: {:d}'.format(split, len(self.images)))
def __init__( self, args, base_dir=Path.db_root_dir('sbd'), split='train', ): """ :param base_dir: path to VOC dataset directory :param split: train/val :param transform: transform to apply """ super().__init__() self._base_dir = base_dir self._dataset_dir = os.path.join(self._base_dir, 'dataset') self._image_dir = os.path.join(self._dataset_dir, 'img') self._cat_dir = os.path.join(self._dataset_dir, 'cls') if isinstance(split, str): self.split = [split] else: split.sort() self.split = split self.args = args # Get list of all images from the split and check that the files exist self.im_ids = [] self.images = [] self.categories = [] for splt in self.split: with open(os.path.join(self._dataset_dir, splt + '.txt'), "r") as f: lines = f.read().splitlines() for line in lines: _image = os.path.join(self._image_dir, line + ".jpg") _categ = os.path.join(self._cat_dir, line + ".mat") assert os.path.isfile(_image) assert os.path.isfile(_categ) self.im_ids.append(line) self.images.append(_image) self.categories.append(_categ) assert (len(self.images) == len(self.categories)) # Display stats print('Number of images: {:d}'.format(len(self.images)))
def create_dataset(subs_list, indexing=True): """ Creates the New Dataset using features calculated from the base data. Parameters ---------- subs_list : list list of subjects to create the new dataset_operations for indexing : bool, optional dataset_operations index column (default = True) """ S = None print( f'\nProcess - {current_process().name} has {len(subs_list)} files to work on.\n' ) try: start = time() repo = (Subject(sub) for sub in subs_list) for sub in repo: S = sub for i in range(3): filePath = Path( f'{new_sensor_paths[i]}/{sub.subject_id[:-4]}.csv') if not os.path.exists(filePath): # Most expensive line of code in the module (Takes hours) col_names, df, _, _, _ = feature_extractor( sub, sensors[i].lower(), output_type='df') df.to_csv(filePath, sep="\t", index=indexing) print( f"File generated - '{sub.subject_id[:-4]}.csv' by process : {current_process().name}" ) else: print(f'File "{sub.subject_id[:-4]}.csv" already exists!') print( f'\nTime taken by - {current_process().name} : {time() - start:.2f} secs' ) except Exception as e: print(f"Exception occurred in {current_process().name}\n") print(f'While working on this portion of the subs_list:\n' f'{subs_list}') print(f'Error occurred in FILE # {S.subject_id}\n') raise e
def create_age_folder_structure(): """ Creates the folder structure for the Age Sorted Dataset. """ try: new_dataset_path = Path(f'{DATASETS}/{FEATURES_DATASET}_Age_Sorted') if not os.path.exists(new_dataset_path): print( f'\nWARNING: The path does not exist. Creating new directory...\n{new_dataset_path}\n' ) os.mkdir(new_dataset_path) except: print( "ERROR in creating the sorted dataset_operations directory within folder /Data Sets" ) return False try: for folder, age_dir in age_dirs.items(): if not os.path.exists(age_dir): os.mkdir(age_dir) else: print(f"The directory {folder} already exists.") except: print( "ERROR in creating age based directories in /Data Sets/Dataset_Age_Sorted" ) return False try: for sub_folder, sensor_dir in sensor_dirs.items(): for sub_path in sensor_dir: if not os.path.exists(sub_path): os.mkdir(sub_path) else: print(f"The directory {sub_path} already exists.") return True except: print( "ERROR in creating sensor directories in /Data Sets/Dataset_Age_Sorted/[age_Groups]" ) return False
def __init__(self, args, base_dir=Path.db_root_dir('coco'), split='train', year='2017'): super().__init__() ann_file = os.path.join( base_dir, 'annotations/instances_{}{}.json'.format(split, year)) ids_file = os.path.join( base_dir, 'annotations/{}_ids_{}.pth'.format(split, year)) self.img_dir = os.path.join(base_dir, 'images/{}{}'.format(split, year)) self.split = split self.coco = COCO(ann_file) self.coco_mask = mask if os.path.exists(ids_file): self.ids = torch.load(ids_file) else: ids = list(self.coco.imgs.keys()) self.ids = self._preprocess(ids, ids_file) self.args = args
def make_data_loader(args, **kwargs): if args.dataset: base_dirs = Path.db_root_dir(args.dataset) print('Training data:{}'.format(base_dirs['train'])) train_loader = DataLoader(dataset=NYUDataset(base_dirs['train'], istest=False), batch_size=args.batch_size, shuffle=True, num_workers=args.workers) print('Validate data:{}'.format(base_dirs['val'])) val_loader = DataLoader( dataset=NYUDataset(base_dirs['val'], istest=True), batch_size=args. batch_size, # 1 * torch.cuda.device_count(), 1 for each GPU shuffle=False, num_workers=args.workers # 1 * torch.cuda.device_count() ) return train_loader, val_loader
def main(): parser = argparse.ArgumentParser( description="Check pid") parser.add_argument("-c", "--count", action='store_true', help="count active processes") parser.add_argument("-e", "--clear", action='store_true', help="clear not running processes") parser.add_argument("-s", "--simplify", action='store_true', help="simplify output") args = parser.parse_args() if not Path(".pid").exists(): return with open(".pid", "r") as fil: pids = fil.readlines() new_pids = [] for pid in pids: pid = pid.strip() if not pid: continue try: process = psutil.Process(int(pid)) if not args.count: cmd = process.cmdline() if args.simplify and len(cmd) > 20: cmd = cmd[:20] cmd.append("...") print("{}: {}".format(pid, " ".join(cmd))) new_pids.append(pid) except psutil.NoSuchProcess: if not args.count and not args.clear: print("{}: <not found>".format(pid)) if args.count: print(len(new_pids)) if args.clear: with open(".pid", "w") as fil: fil.write("\n".join(new_pids) + "\n")
def file_exists(subs_list): """ Checks to see if any previous files with feature extracted data exist in the Dataset and returns the updated list of files which don't exist in the Dataset. This is done because generating the files is expensive and this avoids having to start over from scratch. Parameters ---------- subs_list : list Complete subjects list Returns ------- updated_subs : list list of subject files which are not already in the new Dataset """ updated_subs = [] print(f'Checking for existing files in directories:\n') for dir in new_sensor_paths: print(f'{dir}') updated_subs += subs_list print() for sub in subs_list: for i in range(3): filePath = Path(f'{new_sensor_paths[i]}/{sub[:-4]}.csv') if not os.path.exists(filePath): pass else: updated_subs.pop(updated_subs.index(sub)) updated_subs = list(sorted(set(updated_subs))) print(f'There were {len(subs_list) - len(updated_subs)} existing files!\n') print( f'The updated subjects list now contains {len(updated_subs)} entries.\n' ) return updated_subs
def __init__(self, args, root=Path.db_root_dir('cityscapes'), split="train"): self.root = root self.split = split self.args = args self.files = {} self.images_base = os.path.join(self.root, 'leftImg8bit', self.split) self.annotations_base = os.path.join(self.root, 'gtFine_trainvaltest', 'gtFine', self.split) self.files[split] = self.recursive_glob(rootdir=self.images_base, suffix='.png') self.void_classes = [ 0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1 ] self.valid_classes = [ 7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33 ] self.class_names = ['unlabelled', 'road', 'sidewalk', 'building', 'wall', 'fence', \ 'pole', 'traffic_light', 'traffic_sign', 'vegetation', 'terrain', \ 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', \ 'motorcycle', 'bicycle'] self.ignore_index = 255 self.class_map = dict(zip(self.valid_classes, range(self.NUM_CLASSES))) if not self.files[split]: raise Exception("No files for split=[%s] found in %s" % (split, self.images_base)) print("Found %d %s images" % (len(self.files[split]), split))
def _save_report(self, name): with open(os.path.join(Path.processing_dir(), "{}.html".format(name)), "w") as file: file.write( str(self.soup).replace("<", "<").replace(">", ">"))
class Command(object): def __init__(self, config, graph): self.config = config self.graph = graph self.path = Path(config) self.parser = Parser(config) self.writer = Writer(config) self.loader = Loader(config, graph) # Public methods def new(self, filename): # TODO: parse out docid, maybe sign docid try: assert filename.endswith(self.config.source_ext) except AssertionError as e: print "File name must end with %s" % self.config.source_ext sys.exit(1) source_dir = self.path.get_source_dir() source_abspath = os.path.join(source_dir, filename) content = self._build_initial_source(filename) print "Creating file: %s" % source_abspath self._create_file(source_abspath, content) return source_abspath def edit(self, filename): # Open new file in the editor specified in the yaml config file editor = self.config.editor source_path = self.new(filename) process = "%s %s" % (editor, source_path) return subprocess.call(process.split()) def init(self): # Make sure author is pre-loaded in the database data = dict(username=self.config.username, name=self.config.name) author = self.graph.people.get_or_create("username", self.config.username, data) def build(self): # Create HTML fragments self.writer.run() def update(self): # Update blog entries self.loader.update_entries() # Execute one of the above methods def _execute(self, command_name, command_args): command = getattr(self, command_name) return command(*command_args) # Private methods def _create_file(self, source_abspath, content): self._make_dir(source_abspath) with open(source_abspath, "w") as fout: fout.writelines(content) def _build_initial_source(self, filename): # generat the source from template template_path = self.path.get_rst_template_path() template = get_template(template_path) params = self._get_params(filename) source = template.substitute(params) return source def _get_params(self, filename): # Get template params docid = uuid.uuid4().hex date = datetime.datetime.now().strftime("%Y-%m-%d") username = self.config.username or getpass.getuser() title = self._get_title(filename) title_line = "=" * len(title) params = dict(title=title, title_line=title_line, docid=docid, author=username, date=date) return params def _get_title(self, filename): stub = os.path.splitext(filename)[0] word_list = stub.split(self.config.separator) words = " ".join(word_list) title = titlecase(words) return title def _write_file(self, file_path, content): with open(file_path, "w") as fout: fout.write(content.encode('utf-8') + '\n') def _make_dir(self, path): # mkpath dirname = os.path.dirname(path) if not os.path.isdir(dirname): print "Creating dir: %s" % dirname os.makedirs(dirname)
def initialize(self, config): self.config = config self.path = Path(config) assert self.db_abspath == self.path.get_changelog_abspath()
class ChangeLog(PickleDB): db_name = "changelog" def initialize(self, config): self.config = config self.path = Path(config) assert self.db_abspath == self.path.get_changelog_abspath() def update(self): # File exists so go ahead and read/write to the changlog if self.exists() is False: print "CHANGELOG NOT FOUND: Will add/update all entries in database on push." # Remove the old changelog from git so it doesn't persist on the server self._remove_changelog() return diff = self._get_diff() if not diff: return self._write_diff(diff) self._display() return self.data def _display(self): print "CHANGELOG" for filename in self.data: status, timestamp = self.data[filename] print timestamp, status, filename print def _write_diff(self, diff): source_dir = self.path.get_source_dir() start = self.path.get_working_dir() source_folder = os.path.relpath(source_dir, start) for status, filename in self._split_diff(diff): # filter out files that don't include the source_dir if re.search(source_folder, filename) and filename.endswith(self.config.source_ext): # Git diff is NOT sorted by modified time. # We need it ordered by time so use timestamp instead timestamp = self._current_timestamp() # remove it from the dict and add it back so more recent entries are always last self.data.pop(filename, None) self.data[filename] = (status, timestamp) self.write() # Add the changelog to git now that it has been updated. self._add_changelog() return self.data def _current_timestamp(self): return int(time.time()) def _split_diff(self, diff): lines = [line.split('\t') for line in diff.strip().split('\n')] return lines def _get_diff(self): # git diff is NOT sorted by modified time #command = "git diff --cached --name-only" git_dir = self.path.get_git_dir() working_dir = self.path.get_working_dir() command = "git diff --cached --name-status" return self._execute(command) def _add_changelog(self): # Add the changelog to git after it has been updated. command = "git add %s" % self.path.get_changelog_abspath() self._execute(command) def _remove_changelog(self): # Doing this so the old changelog doesn't persist on the server command = "git rm %s" % self.path.get_changelog_abspath() print self._execute(command) def _execute(self, command): # Setting Git env vars to ensure proper paths when running outside of working dir os.putenv("GIT_DIR", self.path.get_git_dir()) os.putenv("GIT_WORK_TREE", self.path.get_working_dir()) return execute(command)
def test_path_validate(tmp_path): from config import Path not_existing_path = tmp_path / 'nope' existing_dir = tmp_path existing_file = tmp_path / 'yes' with existing_file.open('w'): pass item = Path() assert item.validate(None) is None assert item.validate(not_existing_path) == not_existing_path assert item.validate(existing_dir) == existing_dir assert item.validate(existing_file) == existing_file item = Path(dir_okay=False) assert item.validate(None) is None assert item.validate(not_existing_path) == not_existing_path assert item.validate(existing_file) == existing_file with pytest.raises(ConfigError): item.validate(existing_dir) item = Path(file_okay=False) assert item.validate(None) is None assert item.validate(not_existing_path) == not_existing_path assert item.validate(existing_dir) == existing_dir with pytest.raises(ConfigError): item.validate(existing_file) item = Path(exists=False) assert item.validate(None) is None assert item.validate(not_existing_path) == not_existing_path with pytest.raises(ConfigError): item.validate(existing_file) with pytest.raises(ConfigError): item.validate(existing_dir) item = Path(allow_none=False) with pytest.raises(ConfigError): item.validate(None)
# Configuration Variables # ------------------------ GENERATE_DATASET = True SORT_BY_AGE = False TESTING = True TEST_COUNT = 8 # Should be >= 4 # ------------------------ if not TESTING: FEATURES_DATASET = FEATURES_DATASET else: FEATURES_DATASET = FEATURES_DATASET + "_TEST" new_sensor_paths = [ Path(f"{DATASETS}/{FEATURES_DATASET}/{sensor}") for sensor in sensors ] if not os.path.exists(DATASETS): print( f'\nWARNING: The path does not exist. Creating new directory...\n{DATASETS}\n' ) os.mkdir(DATASETS) def create_dataset_folder_structure(): """ Creates the folder structure for the new dataset_operations. """
def __init__(self, config): self.config = config self.path = Path(config)
def __init__(self, args): self.args = args self.half = args.half self.prev_pred = 0.0 self.bad_count = 0 # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) #train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr*10}] #optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) optimizer = torch.optim.SGD(params=model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) #optimizer = torch.optim.Adam(params=model.parameters(), lr=args.lr) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0
def __init__(self): with open(Path.template_report()) as file: self.soup = BeautifulSoup(file, features="html.parser")
class Loader(object): """Load blog entries into Neo4j.""" def __init__(self, config, graph): #self.changelog = ChangeLog(config) self.path = Path(config) changelog_dir = self.path.get_working_etc() self.changelog = ChangeLog(changelog_dir) self.changelog.initialize(config) self.parser = Parser(config) self.graph = graph def changelog_exists(self): return self.changelog.exists() def update_entries(self): if self.changelog_exists(): print "UPDATING CHANGED" self.update_changed_entries() else: print "UPDATING ALL" self.update_all_entries() def update_all_entries(self): for source_abspath in self.parser.get_all_files(): self.update_entry(source_abspath) def update_changed_entries(self): update_count = 0 data = self.changelog.data if data is None: return update_count last_updated = self.get_last_updated() # Data is an OrderedDict, most recent changes last for source_path in reversed(data): status, timestamp = data[source_path] if self.old_timestamp(timestamp, last_updated): break source_abspath = self.path.get_source_abspath(source_path) update_count += self.update_entry(source_abspath) return update_count def old_timestamp(self, timestamp, last_updated): # Timestamps with a time before the last_updated time # were updated during the previous push return (timestamp <= last_updated) def update_entry(self, source_abspath): data = self.parser.get_data(source_abspath) fragment_abspath = self.path.get_fragment_abspath(source_abspath) if os.path.exists(fragment_abspath) is False: print "WARNING: Fragment Not Found", fragment_abspath return False # TODO: remove entry if fragment doesn't exist entry = self.graph.entries.save(data) return True def set_last_updated(self, last_updated): # Metadata methods are Neo4j-only right now self.graph.set_metadata("entries:last_updated", last_updated) def get_last_updated(self): # Metadata methods are Neo4j-only right now result = self.graph.get_metadata("entries:last_updated") last_updated = result.raw return last_updated
def __init__(self, config): #self.config = config self.path = Path(config) self.parser = Parser(config)
def _get_scatter_trace_values(pid, collumn): raw_csv = CSV.get_csv_values(Path.pid(pid), collumn) y = raw_csv[1::] x = [i for i in range(0, len(y))] title = raw_csv[0] return [title, x, y]
# Performance metric to optimize the model for SCORING = 'f1_weighted' # Set to True if TESTING with the Python CONSOLE TESTING = False # If True, the dataset_operations is normalized before training & testing DATA_NORMALIZATION = True # If True, a selected portion of the entire dataset_operations is used for training+testing (# of rows = row_count) DATA_REDUCE = False # If True, generate a .csv file for the feature ranking GEN_RANKING_FILE = False # If True, a plot will be generated for the # of features used vs performance metric PLOT = False # If True, trained model is exported to TRAINED_MODEL_PATH EXPORT_MODEL = False # Paths # Directory name for new data set which contains the training/testing data for the classifier PROCESSED_DATASET = "Processed_Dataset" # Directory path for new data set which contains the training/testing data for the classifier PROCESSED_DATASET_PATH = Path(f'{DATASETS}/{PROCESSED_DATASET}') # loading in the actual dataset for the ML classifier DATA_PATH = Path(f"{PROCESSED_DATASET_PATH}/ds_all.csv") # Trained Model directory name TRAINED_MODEL_DIR = 'Trained Models' # Trained Model directory path TRAINED_MODEL_PATH = Path(f'{ROOT}/{TRAINED_MODEL_DIR}') # Trained Model name TRAINED_MODEL_NAME = 'step_detection_model_test.pkl' # Trained Normalizer name TRAINED_NORMALIZER_NAME = 'step_detection_min_max_norm_test.pkl'
class Foo(Configurable): path = Path(allow_none=False)
class Parser(object): """Parse ReStructuredText source files.""" def __init__(self, config): self.config = config self.path = Path(config) #self.source_dir = "%s/%s" % (config.project_dir, config.source_folder) def get_fragment(self, source_abspath): source = self.get_document_source(source_abspath) parts = self.get_document_parts(source) return parts['fragment'] def get_data(self, source_abspath): source = self.get_document_source(source_abspath) parts = self.get_document_parts(source) data = dict() data['title'] = parts['title'] data['subtitle'] = parts['subtitle'] data['fragment'] = parts['fragment'] # Extra metadata: docid, author, date, tags meta = self._get_metadata(source, source_abspath) data.update(meta) # Derived parts: slug, fragment_path, source_path slug = self.get_slug(source_abspath) data['slug'] = slug data['fragment_path'] = self.path.get_fragment_path(source_abspath) data['source_path'] = self.path.get_source_path(source_abspath) return data def get_document_source(self, source_abspath): def_source = self.get_substitution_definitions() doc_source = self.read_source_file(source_abspath) source = "\n".join([def_source, doc_source]) return source def get_document_parts(self, source): # http://docutils.sourceforge.net/docs/api/publisher.html#publish-parts-details writer_name = self.config.writer_name settings = dict(initial_header_level=2) # do we need this? options = dict(source=source, writer_name=writer_name, settings_overrides=settings) parts = docutils.core.publish_parts(**options) return parts def get_substitution_definitions(self): # Standard substitution definitions # http://docutils.sourceforge.net/docs/ref/rst/definitions.html module_abspath = os.path.abspath(__file__) module_dir = os.path.dirname(module_abspath) source = self.read_source_file("%s/etc/substitutions.rst" % module_dir) return source def read_source_file(self, file_path): fin = open(file_path, "r") source = fin.read().decode('utf-8') return source def get_slug(self, source_abspath): start = self.path.get_source_dir() #relative_path = file_name.rpartition(source_dir)[-1].lstrip("/") relative_path = os.path.relpath(source_abspath, start) slug = os.path.splitext(relative_path)[0] return slug def _get_metadata(self, source, source_abspath): doctree = docutils.core.publish_doctree(source) docinfo = doctree.traverse(docutils.nodes.docinfo) try: meta = self._process_standard_fields(docinfo) meta = self._process_custom_fields(meta) except IndexError: print "ERROR: Source file is missing data: %s" % source_abspath raise for key, value in meta.items(): meta[key] = value.astext() return meta def _process_standard_fields(self,docinfo): # Standard fields: date, author, etc. meta = {} for node in docinfo[0].children: key = node.tagname.lower() value = node.children[0] meta[key] = value return meta def _process_custom_fields(self, meta): # http://repo.or.cz/w/wrigit.git/blob/f045e5e7766e767c0b56bcb7a1ba0582a6f4f176:/rst.py field = meta['field'] meta['tags'] = field.parent.children[1] meta['docid'] = field.parent.parent.children[0].children[1] del meta['field'] return meta def get_all_files(self): source_dir = self.path.get_source_dir() for root, dirs, files in os.walk(source_dir): for filename in files: # Ignore pattern: emacs autosave files. TODO: generalize this if fnmatch(filename, "*.rst") and not fnmatch(filename, "*.#*"): source_abspath = os.path.join(root, filename) yield source_abspath