Пример #1
0
class BrasRNCHTMLParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.inColumn = False
        self.chart = None
        self.column = 0
        self.base_url = ''
        self.catalog = RncChartCatalog()
        self.catalog.title = "Brasil RNC Charts"
        self.inLink = False

    def handle_starttag(self, tag, attrs):
        if tag == 'tr':
            self.chart = Chart()
            self.chart.chart_format = 'Sailing Chart, International Chart'
        elif tag == 'td':
            self.inColumn = True
            self.column += 1
        elif tag == 'a' and self.inColumn and self.column == 3:
            for attr in attrs:
                if attr[0] == 'href':
                    self.chart.url = self.base_url + attr[1]
        elif tag == 'a' and self.inColumn and self.column == 2:
            for attr in attrs:
                if attr[0] == 'href':
                    self.inLink = True

    def handle_endtag(self, tag):
        if tag == 'tr':
            if self.chart.is_valid():
                self.catalog.add_chart(self.chart)
            else:
                print "<!-- unavailable/invalid?"
                pprint(vars(self.chart))
                print "-->"
            self.column = 0
        elif tag == 'td':
            self.inColumn = False

    def handle_data(self, data):
        if self.inColumn:
            if self.inColumn:
                if self.column == 1:
                    self.chart.number = data.strip().replace(" ", "_")
                if self.column == 2:
                    if self.inLink and data.strip() != '':
                        self.chart.title = data.strip()
                        self.inLink = False
                if self.column == 4:
                    try:
                        self.chart.zipfile_ts = datetime.strptime(
                            data.strip(), '%d/%m/%Y')
                    except:
                        pass
                if self.column == 5:
                    self.chart.ntm_edition_last_correction = data.strip()

    def print_xml(self):
        self.catalog.print_xml(True)
Пример #2
0
"""Script to process the JSON feed of the Dutch IENC charts list and convert it to the XML catalog format
Part of the ChartCatalogs project
Copyright (c) 2019-2020 Marcel Verpaalen
Licensed under GPLv2 or, at your will later version
"""

import sys
from ChartCatalogs import Chart, RncChartCatalog
from datetime import datetime
import json

catalog = RncChartCatalog()
catalog.title = "Netherlands Inland ENC Charts"

with open(sys.argv[1]) as f:
    data = json.load(f)
    cnt = 0
    for tileset in data:
        chart = Chart()
        chart.chart_format = 'Sailing Chart, International Chart'
        chart.url = "https://vaarweginformatie.nl/fdd/main/wicket/resource/org.apache.wicket.Application/downloadfileResource?fileId=%s" % tileset[
            'fileId']
        chart.number = "%s" % cnt
        chart.title = "%s" % tileset['name']
        chart.zipfile_ts = datetime.fromtimestamp(tileset['date'] / 1000)
        chart.target_filename = "%s.zip" % tileset['name']
        catalog.add_chart(chart)
        cnt = cnt + 1

catalog.print_xml(True)