def get_unlinked_archive(zim_files, zim_archives): """ return the list of archives which are not linked in zim_files """ # First, we bluid a dictionary # to list html archives which are # still in Notes file_archives = {} for name in zim_archives: file_archives[name] = False print("START: %s" % name) # We process all zim files # To get existing links for filename in zim_files: for line in open(filename, "r"): for path in editline.extract_labels_filepath(line): # FIXME the key may not exist, should be handled name = os.path.basename(path) # it exists -> True file_archives[name] = True print("EXIST: %s" % name) archives = [] # We delete all path related to False value for htmlfile in file_archives.keys(): if file_archives[htmlfile] == False: archives.append(os.path.splitext(htmlfile)[0]) print("RQ: %s" % htmlfile) return archives
def clean_archive(zim_files, zim_archive_path): """ Remove archives with no entry """ #First, we bluid a dictionary #to list usefull archives file_archives = {} for filepath in get_archive_list(zim_archive_path): file_archives[filepath] = False re_archive = re.compile('\s\[\[.*\|\(Archive\)\]\]') for filename in zim_files: for line in open(filename, 'r'): for path in editline.extract_labels_filepath(line): #FIXME the key may not exist, should be handled path = os.path.expanduser(path) file_archives[path] = True for arch in file_archives.keys(): if file_archives[arch] == False: #os.remove logging.info('remove ' + str(arch)) os.remove(arch)