def parse_folder(path): """ Parses all .bib files in given folder. Returns a tuple (parsed_iten, search_index) containing all items found """ if not os.path.isdir(path): raise Exception("Path to folder expected") parsed_items = [] files = utils.search_in_folder(path, lambda path: path.endswith(".bib")) executor = concurrent.futures.ProcessPoolExecutor( max_workers=multiprocessing.cpu_count()) futures = [ executor.submit(BibParser()._parse_file, os.path.join(path, filename)) for filename in files ] for future in futures: parsed_items += future.result() executor.shutdown() parsed_items = list( sorted(parsed_items, key=BibItem.key_to_key_func(const.DEFAULT_ORDER_BY))) item_index = search_index.Index(parsed_items) fin_ctx = FinalizingContext(item_index) for item in parsed_items: item.finalize_item_set(fin_ctx) item_index.update(parsed_items) return (parsed_items, item_index)
def parse_folder(path): """ Parses all .bib files in given folder. Returns a tuple (parsed_iten, search_index) containing all items found """ if not os.path.isdir(path): raise Exception("Path to folder expected") parsed_items = [] files = utils.search_in_folder(path, lambda path: path.endswith(".bib")) executor = concurrent.futures.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()) futures = [ executor.submit( BibParser()._parse_file, os.path.join(path, filename) ) for filename in files ] for future in futures: parsed_items += future.result() executor.shutdown() parsed_items = list(sorted( parsed_items, key=BibItem.key_to_key_func(const.DEFAULT_ORDER_BY) )) item_index = search_index.Index(parsed_items) fin_ctx = FinalizingContext(item_index) for item in parsed_items: item.finalize_item_set(fin_ctx) item_index.update(parsed_items) return (parsed_items, item_index)
def fetch_added_on_from_git(): BLAME_REGEXP = re.compile( #commit hash r"^[\^0-9a-z]+\s+" #filename r"[^\s]*?\s+" #committer's name r"\([A-Za-z\-\s\\]*?\s+" #commit date r"(?P<date>\d{4}-\d{2}-\d{2})\s+" #commit time r"[\d:]+\s+" #commit time zone r"[+\d]+\s+" #line numberq r"\d+\)\s+" #item id r"(?P<id>[a-z_\d]+),\s*$" ) def blame_file(path): data = subprocess.check_output([ "git", "blame", #WARN: using show-name to guarantee output format "--show-name", #no such option in "git blame" on trusty #"--no-progress", path ]).decode() result = dict() for line in data.split("\n"): match = BLAME_REGEXP.search(line) if not match: continue item_id = match.group("id") date = datetime.datetime.strptime( match.group("date"), config.parser.date_format ) result[item_id] = date return result result = dict() filter = lambda path: path.endswith(".bib") for path in utils.search_in_folder(config.parser.bibdata_dir, filter): result.update(blame_file(path)) return result
def fetch_backups_from_fs(): if not os.path.isdir(config.www.backup_dir): return [] FOLDERS_TO_VALIDATE = [ "Cooking", "Fashion", "Games", "Images", "Library", ] trim_root = lambda path: os.path.relpath(path, start=config.www.backup_dir) filter = lambda path: const.FILENAME_REGEXP.match(os.path.basename(path)) backups = [] for basename in FOLDERS_TO_VALIDATE: folder = os.path.join(config.www.backup_dir, basename) backups += list(map(trim_root, utils.search_in_folder(folder, filter))) return set(backups)
def fetch_added_on_from_git(): BLAME_REGEXP = re.compile( #commit hash r"^[\^0-9a-z]+\s+" #filename r"[^\s]*?\s+" #committer's name r"\([A-Za-z\-\s\\]*?\s+" #commit date r"(?P<date>\d{4}-\d{2}-\d{2})\s+" #commit time r"[\d:]+\s+" #commit time zone r"[+\d]+\s+" #line numberq r"\d+\)\s+" #item id r"(?P<id>[a-z_\d]+),\s*$") def blame_file(path): data = subprocess.check_output([ "git", "blame", #WARN: using show-name to guarantee output format "--show-name", #no such option in "git blame" on trusty #"--no-progress", path ]).decode() result = dict() for line in data.split("\n"): match = BLAME_REGEXP.search(line) if not match: continue item_id = match.group("id") date = datetime.datetime.strptime(match.group("date"), config.parser.date_format) result[item_id] = date return result result = dict() filter = lambda path: path.endswith(".bib") for path in utils.search_in_folder(config.parser.bibdata_dir, filter): result.update(blame_file(path)) return result
def fetch_filelist_from_fs(): if not os.path.isdir(config.www.elibrary_dir): return [] FOLDERS_TO_VALIDATE = ["Library"] EXCLUDED_FOLDERS = { "Ancillary sources (not in bibliography)", "Leaflets (not in bibliography)", } trim_root = lambda path: os.path.relpath(path, start=config.www.elibrary_dir) filter = lambda path: os.path.isfile(path) and path.endswith(".pdf") stored_files = [] for basename in FOLDERS_TO_VALIDATE: folder = os.path.join(config.www.elibrary_dir, basename) stored_files += list( map( trim_root, utils.search_in_folder(folder, filter, excludes=EXCLUDED_FOLDERS))) return set(stored_files)
def fetch_backups_from_fs(): if not os.path.isdir(config.www.backup_dir): return [] FOLDERS_TO_VALIDATE = [ "Cooking", "Fashion", "Games", "Images", "Library", ] trim_root = lambda path: os.path.relpath(path, start=config.www.backup_dir) filter = lambda path: const.FILENAME_REGEXP.match(os.path.basename(path)) backups = [] for basename in FOLDERS_TO_VALIDATE: folder = os.path.join(config.www.backup_dir, basename) backups += list( map( trim_root, utils.search_in_folder(folder, filter) ) ) return set(backups)
def fetch_filelist_from_fs(): if not os.path.isdir(config.www.elibrary_dir): return [] FOLDERS_TO_VALIDATE = [ "Library" ] EXCLUDED_FOLDERS = { "Ancillary sources (not in bibliography)", "Leaflets (not in bibliography)", } trim_root = lambda path: os.path.relpath(path, start=config.www.elibrary_dir) filter = lambda path: os.path.isfile(path) and path.endswith(".pdf") stored_files = [] for basename in FOLDERS_TO_VALIDATE: folder = os.path.join(config.www.elibrary_dir, basename) stored_files += list( map( trim_root, utils.search_in_folder(folder, filter, excludes=EXCLUDED_FOLDERS) ) ) return set(stored_files)