Пример #1
0
def sub(categoryName, depth=1):
    files = page.Category(commons, categoryName).articlesList()
    if depth <= 0:
        return files
    else:
        categories = page.Category(commons, categoryName).subcategoriesList()
        result = list(files)
        result = result + categories
        for cat in categories:
            result = result + sub(cat.title()[9:], depth - 1)
        return result
Пример #2
0
def subcategories(category_name, flattening=False):
    if flattening:
        result = []
        for category in page.Category(COMMONS, category_name).subcategories():
            subs = [cat for cat in category.subcategories()]
            if len(subs) == 0:
                result.append(category)
            else:
                for subcategory in subs:
                    result.append(subcategory)
        return result
    else:
        return [
            category for category in page.Category(
                COMMONS, category_name).subcategories()
        ]
Пример #3
0
def deep_loading(category):
    queue = [(category, category)]
    while len(queue) > 0:
        (child, parent) = queue.pop()
        treat_category(child, parent)
        subs = page.Category(commons, child).subcategoriesList()
        for sub in subs:
            if not is_artwork(sub):
                queue.append((sub.title(), child))
Пример #4
0
def institutions(categoryName):
    category = page.Category(commons, categoryName)
    for subPage in sub(categoryName):
        storesFamily(categoryName, subPage.title()[9:])
        if subPage.isCategory():
            institution(subPage.title(0)[9:], stores=True)
    with open("dump.json", "w") as file:
        data = json.dumps(cache, indent=2)
        file.write(data)
    return cache
Пример #5
0
def gathering(category_name, height):
    category_set = set([])
    files = [
        f for f in page.Category(COMMONS, category_name).members(
            namespaces=FILE_NAMESPACE)
    ]
    allImages = [f.title() for f in files]
    LOG.info(u"Examining %s", category_name)
    for file in files:
        LOG.info(u'gathering %s', file.title())
        if file.title() not in categories_tree:
            categories_tree[file.title()] = list(
                categories(file.title(), height))
        category_set |= set(categories_tree[file.title()])
    stringBuffer = []
    categories_tree["files"] = {}
    categories_tree["categories"] = {}
    for j, file in enumerate(
            page.Category(COMMONS,
                          category_name).members(namespaces=FILE_NAMESPACE)):
        categories_tree["files"][j] = file.title()
        stringBuffer.append("\n")
        stringBuffer.append(str(j))
        for i, category in enumerate(category_set):
            categories_tree["categories"][i] = category.title()
            stringBuffer.append(" ")
            stringBuffer.append(str(i))
            stringBuffer.append(":")
            stringBuffer.append(
                str(int(category in categories_tree[file.title()])))
    LOG.info(u"Storing categories")
    with open("categories1.json", "w") as file:
        data = json.dumps(categories_tree, indent=2)
        file.write(data)
    LOG.info(u"Storing datapoints")
    file = io.open(category_name + "-" + str(height) + ".txt",
                   mode="w",
                   encoding="utf-8")
    file.write(u"".join(stringBuffer))
    return allImages
Пример #6
0
def build_tree(category, with_children=True, with_parents=False):
    global nbCalls
    nbCalls = nbCalls + 1
    if (nbCalls % frequency) == 0:
        flush()
    if category not in tree:
        node = {}
        parents = [
            p.title() for p in page.Category(commons, category).categories()
        ]
        node["Parents"] = parents
        children = [
            p.title()
            for p in page.Category(commons, category).subcategories()
        ]
        node["Children"] = children
        tree[category] = node
        if with_children:
            for child in children:
                build_tree(child, with_children, with_parents)
        if with_parents:
            for parent in parents:
                build_tree(parent, with_children, with_parents)
Пример #7
0
def institution(categoryName, height=4, stores=True):
    result = oldInstitution(categoryName)
    if result == "not found":
        category = page.Category(commons, categoryName)
        if height <= 0:
            inst = [i for i in category.articles(namespaces=106)]
            if len(inst) == 0:
                result = None
            else:
                items = itemExpression.findall(inst[0].get())
                if len(items) is not 0:
                    result = items[0]
        else:
            result = institution(categoryName, 0, True)
            for parent in category.categories():
                if result is not None:
                    break
                result = institution(parent.title(), height - 1)
        fill(categoryName, result, cache)
    else:
        result = None
    return result
Пример #8
0
                    apply_all = True
                else:
                    apply_now = answer == "y"
            if apply_all or apply_now:
                article.save(comment=comment)
            else:
                print("Skipping...")
            if pywikibot.config.simulate:
                print("Summary: {}".format(comment))


def scan_category(category,
                  prefix,
                  reorder_sections=False,
                  must_be_part=True,
                  sub_categories=False):
    new_description = "{} description".format(prefix)
    apply_all = False
    scan_list(category.articles(), new_description, reorder_sections,
              must_be_part)
    if sub_categories:
        scan_list(category.subcategories(), new_description, reorder_sections,
                  must_be_part)


scan_category(page.Category(site, "Category:Agencies"),
              "Agency",
              must_be_part=False,
              sub_categories=True)
scan_category(page.Category(site, "Category:Default parts"), "Product", True)
Пример #9
0
                                    comment += " *changed to marked as thumbnail;"
                                    log_entries.append(
                                        (image.title(), "++", new_hash))
                                else:
                                    log_entries.append(
                                        (image.title(), "*", new_hash))
                            image.text = unicode(parsed)
                            image.save(comment=comment)
                        elif is_thumbnail:
                            image.text = "{{No thumbnails please|" + new_hash + "}}" + image.text
                            image.save(
                                comment="+no thumbnails please template;")
                            log_entries.append((image.title(), "+", new_hash))


for image in page.Category(site,
                           "Category:Image thumbnails").articles(namespaces=6):
    check_image(page.ImagePage(image), handled)

print("Finished checking already marked images. Search for new images.")

for image in site.allimages(maxsize=20 * 2 << 10):
    check_image(image, handled)

if log_entries:
    table = ksp_util.EditTable(site, "User:BobBot/The Thumbnail Job")

    log_entries.sort(key=lambda entry: entry[0])
    line = "\n".join([
        "{{{{User:BobBot/The Thumbnail Job/entry|{}|{}|{}}}}}".format(
            *log_entry) for log_entry in log_entries
    ]) + "\n"
Пример #10
0
def images_of(category):
    return [
        img for img in page.Category(COMMONS, category).members(
            namespaces=FILE_NAMESPACE)
    ]