Exemplo n.º 1
0
	def parse_folder(path):
		"""
		Parses all .bib files in given folder.
		Returns a tuple (parsed_iten, search_index) containing all items found
		"""
		if not os.path.isdir(path):
			raise Exception("Path to folder expected")

		parsed_items = []
		files = utils.search_in_folder(path, lambda path: path.endswith(".bib"))
		executor = concurrent.futures.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count())
		futures = [
			executor.submit(
				BibParser()._parse_file,
				os.path.join(path, filename)
			)
			for filename in files
		]
		for future in futures:
			parsed_items += future.result()
		executor.shutdown()

		parsed_items = list(sorted(
			parsed_items,
			key=BibItem.key_to_key_func(const.DEFAULT_ORDER_BY)
		))
		item_index = search_index.Index(parsed_items)
		fin_ctx = FinalizingContext(item_index)
		for item in parsed_items:
			item.finalize_item_set(fin_ctx)
		item_index.update(parsed_items)
		return (parsed_items, item_index)
Exemplo n.º 2
0
	def parse_folder(path):
		"""
		Parses all .bib files in given folder.
		Returns a tuple (parsed_iten, search_index) containing all items found
		"""
		if not os.path.isdir(path):
			raise Exception("Path to folder expected")

		parsed_items = []
		files = utils.search_in_folder(path, lambda path: path.endswith(".bib"))
		executor = concurrent.futures.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count())
		futures = [
			executor.submit(
				BibParser()._parse_file,
				os.path.join(path, filename)
			)
			for filename in files
		]
		for future in futures:
			parsed_items += future.result()
		executor.shutdown()

		parsed_items = list(sorted(
			parsed_items,
			key=BibItem.key_to_key_func(const.DEFAULT_ORDER_BY)
		))
		item_index = search_index.Index(parsed_items)
		fin_ctx = FinalizingContext(item_index)
		for item in parsed_items:
			item.finalize_item_set(fin_ctx)
		item_index.update(parsed_items)
		return (parsed_items, item_index)
Exemplo n.º 3
0
def fetch_filelist_from_fs():
	EXCLUDED_FOLDERS = {
		"Ancillary sources (not in bibliography)",
		"Leaflets (not in bibliography)"
	}
	trim_root = lambda path: "/" + os.path.relpath(path, start=config.www.elibrary_dir)
	filter = lambda path: os.path.isfile(path) and path.endswith(".pdf")
	return set(map(
		trim_root,
		utils.search_in_folder(config.www.elibrary_dir, filter, excludes=EXCLUDED_FOLDERS)
	))
Exemplo n.º 4
0
def fetch_filelist_from_fs():
	EXCLUDED_FOLDERS = {
		"Ancillary sources (not in bibliography)",
		"Leaflets (not in bibliography)"
	}
	trim_root = lambda path: "/" + os.path.relpath(path, start=config.www.elibrary_dir)
	filter = lambda path: os.path.isfile(path) and path.endswith(".pdf")
	return set(map(
		trim_root,
		utils.search_in_folder(config.www.elibrary_dir, filter, excludes=EXCLUDED_FOLDERS)
	))
Exemplo n.º 5
0
def fetch_added_on_from_git():
	BLAME_REGEXP = re.compile(
		#commit hash
		r"^[\^0-9a-z]+\s+"
		#filename
		r"[^\s]*?\s+"
		#committer's name
		r"\([A-Za-z\-\s\\]*?\s+"
		#commit date
		r"(?P<date>\d{4}-\d{2}-\d{2})\s+"
		#commit time
		r"[\d:]+\s+"
		#commit time zone
		r"[+\d]+\s+"
		#line numberq
		r"\d+\)\s+"
		#item id
		r"(?P<id>[a-z_\d]+),\s*$"
	)
	def blame_file(path):
		data = subprocess.check_output([
			"git",
			"blame",
			#WARN: using show-name to guarantee output format
			"--show-name",
			#no such option in "git blame" on trusty
			#"--no-progress",
			path
		]).decode()
		result = dict()
		for line in data.split("\n"):
			match = BLAME_REGEXP.search(line)
			if not match:
				continue
			item_id = match.group("id")
			date = datetime.datetime.strptime(
				match.group("date"),
				config.parser.date_format
			)
			result[item_id] = date
		return result

	result = dict()
	filter = lambda path: path.endswith(".bib")
	for path in utils.search_in_folder(config.parser.bibdata_dir, filter):
		result.update(blame_file(path))
	return result
Exemplo n.º 6
0
def fetch_added_on_from_git():
	BLAME_REGEXP = re.compile(
		#commit hash
		r"^[\^0-9a-z]+\s+"
		#filename
		r"[^\s]*?\s+"
		#committer's name
		r"\([A-Za-z\-\s\\]*?\s+"
		#commit date
		r"(?P<date>\d{4}-\d{2}-\d{2})\s+"
		#commit time
		r"[\d:]+\s+"
		#commit time zone
		r"[+\d]+\s+"
		#line numberq
		r"\d+\)\s+"
		#item id
		r"(?P<id>[a-z_\d]+),\s*$"
	)
	def blame_file(path):
		data = subprocess.check_output([
			"git",
			"blame",
			#WARN: using show-name to guarantee output format
			"--show-name",
			#no such option in "git blame" on trusty
			#"--no-progress",
			path
		]).decode()
		result = dict()
		for line in data.split("\n"):
			match = BLAME_REGEXP.search(line)
			if not match:
				continue
			item_id = match.group("id")
			date = datetime.datetime.strptime(
				match.group("date"),
				config.parser.date_format
			)
			result[item_id] = date
		return result

	result = dict()
	filter = lambda path: path.endswith(".bib")
	for path in utils.search_in_folder(config.parser.bibdata_dir, filter):
		result.update(blame_file(path))
	return result
Exemplo n.º 7
0
def fetch_backups_from_fs():
    if not os.path.isdir(config.www.backup_dir):
        return []
    EXCLUDED_FOLDERS = {
        "Cooking", "Fashion", "Leaflets (not in bibliography)", "Postcards",
        "Useless"
    }
    trim_root = lambda path: "/" + os.path.relpath(path,
                                                   start=config.www.backup_dir)
    filter = lambda path: (os.path.isdir(path) and const.FILENAME_REGEXP.match(
        os.path.basename(path)))
    return set(
        map(
            trim_root,
            utils.search_in_folder(config.www.backup_dir,
                                   filter,
                                   excludes=EXCLUDED_FOLDERS)))
Exemplo n.º 8
0
def fetch_backups_from_fs():
	if not os.path.isdir(config.www.backup_dir):
		return []
	EXCLUDED_FOLDERS = {
		"Cooking",
		"Fashion",
		"Leaflets (not in bibliography)",
		"Postcards",
		"Useless"
	}
	trim_root = lambda path: "/" + os.path.relpath(path, start=config.www.backup_dir)
	filter = lambda path: (
		os.path.isdir(path) and
		const.FILENAME_REGEXP.match(os.path.basename(path))
	)
	return set(map(
		trim_root,
		utils.search_in_folder(config.www.backup_dir, filter, excludes=EXCLUDED_FOLDERS)
	))
Exemplo n.º 9
0
def fetch_backups_from_fs():
	if not os.path.isdir(config.www.backup_dir):
		return []
	FOLDERS_TO_VALIDATE = {
		"Cooking",
		"Fashion",
		"Library",
	}
	trim_root = lambda path: os.path.relpath(path, start=config.www.backup_dir)
	filter = lambda path: (
		os.path.isdir(path) and
		const.FILENAME_REGEXP.match(os.path.basename(path))
	)
	backups = []
	for basename in FOLDERS_TO_VALIDATE:
		folder = os.path.join(config.www.backup_dir, basename)
		backups += list(
			map(
				trim_root,
				utils.search_in_folder(folder, filter)
			)
		)
	return backups