def run(self):

        # setup the output XML
        master_root = ET.Element("applications")
        master_root.set("version", "0.1")
        master_tree = ET.ElementTree(master_root)

        # find any extra appstream files
        files = glob.glob("../appstream-extra/*.xml")
        for f in files:
            tree = ET.parse(f)
            root = tree.getroot()
            self.log.update_key(os.path.basename(f))
            for app in root:
                app_id = app.find('id')
                if app_id is None:
                    self.log.write(LoggerItem.WARNING, "appstream id not found")
                    continue

                # add everything
                new = ET.SubElement(master_root, 'application')
                for elem in app:
                    new.append(elem)

                # check for screenshots in ../screenshots-extra/${id}/*
                tmp = Application(None, self.cfg)
                tmp.set_id(app_id.text)
                self.log.write(LoggerItem.INFO, "adding %s" % tmp.app_id_full)
                overrides = glob.glob("../screenshots-extra/%s/*.png" % tmp.app_id)
                if len(overrides) > 0:
                    self.log.write(LoggerItem.INFO,
                                   "adding %i screenshot overrides" % len(overrides))
                for ss_fn in overrides:
                    tmp.add_screenshot_filename(ss_fn)
                tmp.build_xml_screenshots(new)

        # add the generated appstream files
        files = glob.glob("./appstream/*.xml")
        files.sort()

        recognised_types = ['desktop', 'codec', 'font', 'inputmethod']
        for filename in files:
            self.log.update_key(filename)
            try:
                tree = ET.parse(filename)
            except ET.ParseError, e:
                self.log.write(LoggerItem.WARNING, "XML could not be parsed: %s" % str(e))
                continue
            root = tree.getroot()
            for app in root:
                app_id = app.find('id')

                # check type is known
                app_id_type = app_id.get('type')
                if app_id_type not in recognised_types:
                    self.log.write(LoggerItem.WARNING,
                              "appstream id type %s not recognised" % app_id_type)
                    continue

                # detect duplicate IDs in the data
                if self.application_ids.has_key(app_id):
                    found = self.application_ids[app_id.text]
                    self.log.write(LoggerItem.WARNING,
                              "duplicate ID found in %s and %s" % (filename, found))
                    continue

                # add everything that isn't private
                new = ET.SubElement(master_root, 'application')
                for elem in app:
                    if elem.tag.startswith("X-"):
                        continue
                    new.append(elem)

                # success
                self.application_ids[app_id.text] = filename
                self.log.write(LoggerItem.INFO, "adding %s" % app_id.text)
def main():
    log = LoggerItem()
    cfg = Config()

    # read in AppStream file into several Application objects
    f = gzip.open(sys.argv[1], 'rb')
    tree = ET.parse(f)
    apps = []
    for app in tree.getroot():
        a = Application(None, cfg)
        for elem in app:
            if elem.tag == 'id':
                a.set_id(elem.text)
                a.type_id = elem.get('type')
                log.update_key(a.app_id_full)
                log.write(LoggerItem.INFO, "parsing")
            elif elem.tag == 'name':
                if elem.get(XML_LANG):
                    continue
                a.names['C'] = ensure_unicode(elem.text)
            elif elem.tag == 'summary':
                if elem.get(XML_LANG):
                    continue
                a.comments['C'] = ensure_unicode(elem.text)
            elif elem.tag == 'pkgname':
                a.pkgnames.append(ensure_unicode(elem.text))
            elif elem.tag == 'appcategories':
                for elem2 in elem:
                    a.categories.append(ensure_unicode(elem2.text))
            elif elem.tag == 'keywords':
                for elem2 in elem:
                    a.keywords.append(ensure_unicode(elem2.text))
            elif elem.tag == 'url':
                a.urls[elem.get('type')] = ensure_unicode(elem.text)
            elif elem.tag == 'compulsory_for_desktop':
                a.compulsory_for_desktop.append(ensure_unicode(elem.text))
            elif elem.tag == 'project_group':
                a.project_group = ensure_unicode(elem.text)
            elif elem.tag == 'description':
                description = ''
                if len(elem._children):
                    for elem2 in elem:
                        description += elem2.text + u' '
                else:
                    description = elem.text
                a.descriptions['C'] = ensure_unicode(description)
            elif elem.tag == 'screenshots':
                if a.type_id == 'font':
                    continue
                for elem2 in elem:
                    if elem2.tag != 'screenshot':
                        continue
                    caption = None
                    for elem3 in elem2:
                        if elem3.tag == 'caption':
                            caption = elem3.text
                        elif elem3.tag == 'image':
                            if elem3.get('type') != 'source':
                                continue
                            s = Screenshot(a.app_id, None, caption)
                            s.basename = os.path.basename(elem3.text)
                            a.screenshots.append(s)
        apps.append(a)
    f.close()

    # build status page
    status = open('./screenshots/status.html', 'w')
    status.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 ' +
                 'Transitional//EN" ' +
                 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n')
    status.write('<html xmlns="http://www.w3.org/1999/xhtml">\n')
    status.write('<head>\n')
    status.write('<meta http-equiv="Content-Type" content="text/html; ' +
                          'charset=UTF-8" />\n')
    status.write('<title>Application Data Review</title>\n')
    status.write('</head>\n')
    status.write('<body>\n')

    status.write('<h1>Executive summary</h1>\n')
    status.write('<ul>\n')

    # long descriptions
    cnt = 0
    total = len(apps)
    for app in apps:
        if len(app.descriptions) > 0:
            cnt += 1
    tmp = 100 * cnt / total
    status.write("<li>Applications in Fedora with long descriptions: %i (%i%%)</li>" % (cnt, tmp))

    # keywords
    cnt = 0
    total = len(apps)
    for app in apps:
        if len(app.keywords) > 0:
            cnt += 1
    tmp = 100 * cnt / total
    status.write("<li>Applications in Fedora with keywords: %i (%i%%)</li>" % (cnt, tmp))

    # categories
    cnt = 0
    total = len(apps)
    for app in apps:
        if len(app.categories) > 0:
            cnt += 1
    tmp = 100 * cnt / total
    status.write("<li>Applications in Fedora with categories: %i (%i%%)</li>" % (cnt, tmp))

    # screenshots
    cnt = 0
    total = len(apps)
    for app in apps:
        if len(app.screenshots) > 0:
            cnt += 1
    tmp = 100 * cnt / total
    status.write("<li>Applications in Fedora with screenshots: %i (%i%%)</li>" % (cnt, tmp))

    # project apps with appdata
    for project_group in ['GNOME', 'KDE', 'XFCE']:
        cnt = 0
        total = 0
        for app in apps:
            if app.project_group != project_group:
                continue
            total += 1
            if len(app.screenshots) > 0 or len(app.descriptions) > 0:
                cnt += 1
        tmp = 0
        if total > 0:
            tmp = 100 * cnt / total
        status.write("<li>Applications in %s with AppData: %i (%i%%)</li>" % (project_group, cnt, tmp))
    status.write('</ul>\n')

    # write applications
    status.write('<h1>Applications</h1>\n')
    for app in apps:
        if app.type_id == 'font':
            continue
        if app.type_id == 'inputmethod':
            continue
        if app.type_id == 'codec':
            continue
        log.update_key(app.app_id_full)
        log.write(LoggerItem.INFO, "writing")
        try:
            status.write(_to_utf8(_to_html(app)))
        except AttributeError, e:
            log.write(LoggerItem.WARNING, "failed to write %s: %s" % (app, str(e)))
            continue