def sub(categoryName, depth=1): files = page.Category(commons, categoryName).articlesList() if depth <= 0: return files else: categories = page.Category(commons, categoryName).subcategoriesList() result = list(files) result = result + categories for cat in categories: result = result + sub(cat.title()[9:], depth - 1) return result
def subcategories(category_name, flattening=False): if flattening: result = [] for category in page.Category(COMMONS, category_name).subcategories(): subs = [cat for cat in category.subcategories()] if len(subs) == 0: result.append(category) else: for subcategory in subs: result.append(subcategory) return result else: return [ category for category in page.Category( COMMONS, category_name).subcategories() ]
def deep_loading(category): queue = [(category, category)] while len(queue) > 0: (child, parent) = queue.pop() treat_category(child, parent) subs = page.Category(commons, child).subcategoriesList() for sub in subs: if not is_artwork(sub): queue.append((sub.title(), child))
def institutions(categoryName): category = page.Category(commons, categoryName) for subPage in sub(categoryName): storesFamily(categoryName, subPage.title()[9:]) if subPage.isCategory(): institution(subPage.title(0)[9:], stores=True) with open("dump.json", "w") as file: data = json.dumps(cache, indent=2) file.write(data) return cache
def gathering(category_name, height): category_set = set([]) files = [ f for f in page.Category(COMMONS, category_name).members( namespaces=FILE_NAMESPACE) ] allImages = [f.title() for f in files] LOG.info(u"Examining %s", category_name) for file in files: LOG.info(u'gathering %s', file.title()) if file.title() not in categories_tree: categories_tree[file.title()] = list( categories(file.title(), height)) category_set |= set(categories_tree[file.title()]) stringBuffer = [] categories_tree["files"] = {} categories_tree["categories"] = {} for j, file in enumerate( page.Category(COMMONS, category_name).members(namespaces=FILE_NAMESPACE)): categories_tree["files"][j] = file.title() stringBuffer.append("\n") stringBuffer.append(str(j)) for i, category in enumerate(category_set): categories_tree["categories"][i] = category.title() stringBuffer.append(" ") stringBuffer.append(str(i)) stringBuffer.append(":") stringBuffer.append( str(int(category in categories_tree[file.title()]))) LOG.info(u"Storing categories") with open("categories1.json", "w") as file: data = json.dumps(categories_tree, indent=2) file.write(data) LOG.info(u"Storing datapoints") file = io.open(category_name + "-" + str(height) + ".txt", mode="w", encoding="utf-8") file.write(u"".join(stringBuffer)) return allImages
def build_tree(category, with_children=True, with_parents=False): global nbCalls nbCalls = nbCalls + 1 if (nbCalls % frequency) == 0: flush() if category not in tree: node = {} parents = [ p.title() for p in page.Category(commons, category).categories() ] node["Parents"] = parents children = [ p.title() for p in page.Category(commons, category).subcategories() ] node["Children"] = children tree[category] = node if with_children: for child in children: build_tree(child, with_children, with_parents) if with_parents: for parent in parents: build_tree(parent, with_children, with_parents)
def institution(categoryName, height=4, stores=True): result = oldInstitution(categoryName) if result == "not found": category = page.Category(commons, categoryName) if height <= 0: inst = [i for i in category.articles(namespaces=106)] if len(inst) == 0: result = None else: items = itemExpression.findall(inst[0].get()) if len(items) is not 0: result = items[0] else: result = institution(categoryName, 0, True) for parent in category.categories(): if result is not None: break result = institution(parent.title(), height - 1) fill(categoryName, result, cache) else: result = None return result
apply_all = True else: apply_now = answer == "y" if apply_all or apply_now: article.save(comment=comment) else: print("Skipping...") if pywikibot.config.simulate: print("Summary: {}".format(comment)) def scan_category(category, prefix, reorder_sections=False, must_be_part=True, sub_categories=False): new_description = "{} description".format(prefix) apply_all = False scan_list(category.articles(), new_description, reorder_sections, must_be_part) if sub_categories: scan_list(category.subcategories(), new_description, reorder_sections, must_be_part) scan_category(page.Category(site, "Category:Agencies"), "Agency", must_be_part=False, sub_categories=True) scan_category(page.Category(site, "Category:Default parts"), "Product", True)
comment += " *changed to marked as thumbnail;" log_entries.append( (image.title(), "++", new_hash)) else: log_entries.append( (image.title(), "*", new_hash)) image.text = unicode(parsed) image.save(comment=comment) elif is_thumbnail: image.text = "{{No thumbnails please|" + new_hash + "}}" + image.text image.save( comment="+no thumbnails please template;") log_entries.append((image.title(), "+", new_hash)) for image in page.Category(site, "Category:Image thumbnails").articles(namespaces=6): check_image(page.ImagePage(image), handled) print("Finished checking already marked images. Search for new images.") for image in site.allimages(maxsize=20 * 2 << 10): check_image(image, handled) if log_entries: table = ksp_util.EditTable(site, "User:BobBot/The Thumbnail Job") log_entries.sort(key=lambda entry: entry[0]) line = "\n".join([ "{{{{User:BobBot/The Thumbnail Job/entry|{}|{}|{}}}}}".format( *log_entry) for log_entry in log_entries ]) + "\n"
def images_of(category): return [ img for img in page.Category(COMMONS, category).members( namespaces=FILE_NAMESPACE) ]