def find_undersized():
    for item in RossUploads():
        fi = item.latest_file_info
        tree = mwparserfromhell.parse(item.text)
        try:
            geograph_template = tlgetone(tree, ['Geograph'])
        except IndexError:
            continue
        gridimage_id = int(str(geograph_template.get(1).value))
        commons_author = str(geograph_template.get(2).value)
        if commons_author != "Ross Watson": continue
        c = geodb.cursor()
        c.execute(
            """
            SELECT * FROM gridimage_size
               WHERE gridimage_id = ?
            """, (gridimage_id, ))
        row = c.fetchone()
        gi = get_geograph_info(gridimage_id)
        if row['original_width'] <= fi.width: continue
        uploadurl = "https://commons.wikimedia.org/w/index.php?" + urlencode(
            {
                'title': 'Special:Upload',
                'wpDestFile': item.title(underscore=True, withNamespace=False),
                'wpForReUpload': '1',
                'wpSourceType': 'url',
                'wpUploadFileURL': get_geograph_full_url(gridimage_id, gi),
                'wpUploadDescription':
                'Higher-resolution version from Geograph'
            })
        print("* [%s %d × %d → %d × %d] %s" %
              (uploadurl, fi.width, fi.height, row['original_width'],
               row['original_height'], item.title(asLink=True, textlink=True)))
def add_creditline(t, line):
    assert(len(tlgetall(t, ['Credit line'])) == 0)
    info = tlgetone(t, infoboxes)
    for f in otherfieldses:
        if info.has(f):
            otherfields = info.get(f)
            otherfields.value.append(Text(" "))
            otherfields.value.append(line)
            otherfields.value.append(Text("\n"))
            return
    info.add("other fields", line)
Exemplo n.º 3
0
 def is_original_title(self, page, title):
     # This heuristic depends on GeographBot's behaviour.
     if not self.is_geographbot_upload(page):
         return False
     first_tree = self.get_original_tree(page)
     first_description = str(
         tlgetone(
             first_tree,
             ["Information"]).get("description").value.get(0).get(1).value)
     return (first_description == title or first_description == title + "."
             or first_description.startswith(title + " ")
             or first_description.startswith(title + ". "))
def can_add_creditline(t, line):
    if len(tlgetall(t, ['Credit line'])) != 0:
        return False # Already have a credit line
    try:
        geo = tlgetone(t, ['Geograph'])
    except IndexError:
        return False
    except TooManyTemplates:
        return False
    geo_author = geo.get(2).value
    cl_author = line.get('Author').value
    if canonicalise_name(geo_author) != canonicalise_name(cl_author):
        # Don't add a credit line with wrong author
        return False
    return True
Exemplo n.º 5
0
def get_object_location(tree):
    return tlgetone(tree, objtls)
Exemplo n.º 6
0
def get_location(tree):
    return tlgetone(tree, loctls)
 def treat_page(self):
     t = mwparserfromhell.parse(self.current_page.text)
     info = tlgetone(t, infoboxes)
     others = [f for f in otherfieldses if info.has(f)]
     if len(others) > 1:
         bot.warning("excess other_fields in %s" % (self.current_page, ))
 def process_page(self, page):
     if not page.botMayEdit():
         raise NotEligible("bot forbidden from editing this page")
     tree = mwparserfromhell.parse(page.text)
     try:
         geograph_template = tlgetone(tree, ['Geograph'])
     except IndexError:
         raise NotEligible("No {{Geograph}} template")
     try:
         gridimage_id = int(str(geograph_template.get(1).value))
         commons_author = str(geograph_template.get(2).value)
     except ValueError:
         raise BadTemplate("broken {{Geograph}} template")
     except IndexError:
         raise BadTemplate("broken {{Geograph}} template")
     bot.log("Geograph ID is %d" % (gridimage_id, ))
     c = geodb.cursor()
     c.execute(
         """
         SELECT * FROM gridimage_base NATURAL JOIN gridimage_size
            WHERE gridimage_id = ?
         """, (gridimage_id, ))
     row = c.fetchone()
     if row == None:
         raise NotInGeographDatabase("Geograph ID %d not in database" %
                                     (gridimage_id, ))
     gwidth, gheight, original_width, original_height, original_diff = [
         row[x] for x in ('width', 'height', 'original_width',
                          'original_height', 'original_diff')
     ]
     if original_width == 0:
         raise NotEligible("no high-res version available")
     fi = page.latest_file_info
     bot.log("%d × %d version available" %
             (original_width, original_height))
     bot.log("current Commons version is %d × %d" % (fi.width, fi.height))
     if fi.width >= original_width and fi.height >= original_height:
         raise NotEligible("no higher-resolution version on Geograph")
     if not aspect_ratios_match(fi.width, fi.height, original_width,
                                original_height):
         raise NotEligible("aspect ratios of images differ")
     if (fi.width, fi.height) == (gwidth, gheight):
         if original_diff == 'yes':
             raise NotEligible("Geograph says pictures are different")
     else:
         if max(fi.width, fi.height) not in (800, 1024):
             raise NotEligible("dimensions do not match any Geograph image")
     for ofi in page.get_file_history().values():
         if ofi.user == "Geograph Update Bot":
             raise NotEligible("file already uploaded by me")
     geograph_info = get_geograph_info(gridimage_id)
     if (canonicalise_name(geograph_info['author_name']) !=
             canonicalise_name(commons_author)):
         raise NotEligible(
             "author does not match Geograph (%s vs. %s)" %
             (repr(commons_author), repr(geograph_info['author_name'])))
     try:
         credit_line = tlgetone(tree, ['Credit line'])
     except IndexError:
         pass
     else:
         commons_title = ''.join([
             str(x) for x in credit_line.get('Other').value.filter_text()
         ]).strip()
         bot.log("Title on Commons: %s" % (commons_title, ))
         if (canonicalise_name(commons_title) != canonicalise_name(
                 geograph_info['title'])):
             raise NotEligible(
                 "title does not match Geograph (%s vs. %s)" %
                 (repr(commons_title), repr(geograph_info['title'])))
     geograph_image = get_geograph_size(gridimage_id, geograph_info,
                                        max(fi.width, fi.height))
     if hashlib.sha1(geograph_image).hexdigest() != fi.sha1:
         raise NotEligible("SHA-1 does not match Geograph %d px image." %
                           (max(fi.width, fi.height), ))
     bot.log("Image matches. Update possible.")
     self.replace_file(page,
                       get_geograph_full_url(gridimage_id, geograph_info))
     compare_revisions(self.site, parameters=dict(titles=page.title()))