Пример #1
0
    def test_write_big_csv(self):
        out = StringIO()
        f = resource_filename(__name__, "Wikidata-20131129161111.xml.gz")
        xml = XmlReader.read_xml(gzip.open(f))
        CsvWriter.write_csv(xml, out)

        out.seek(0)
        self.assertThat(len(out.readlines()), Equals(3679))
Пример #2
0
    def test_write_big_csv(self):
        out = StringIO()
        f = resource_filename(__name__, "Wikidata-20131129161111.xml.gz")
        xml = XmlReader.read_xml(gzip.open(f))
        CsvWriter.write_csv(xml, out)

        out.seek(0)
        self.assertThat(len(out.readlines()), Equals(5627))
Пример #3
0
    def test_write_csv(self):
        out = StringIO()
        CsvWriter.write_csv(test_data, out)
        out.seek(0)

        line = out.readline()
        self.assertThat(line.strip(), Equals("Q51,31,wikibase-entityid,Q5107"))

        line = out.readline()
        self.assertThat(line.strip(), Equals("Q51,373,string,Europe"))

        self.assertThat(out.read(), Equals(""))
Пример #4
0
    def test_write_csv(self):
        out = StringIO()
        CsvWriter.write_csv(test_data, out)
        out.seek(0)

        line = out.readline()
        self.assertThat(line.strip(), Equals("Q51,31,wikibase-entityid,Q5107"))

        line = out.readline()
        self.assertThat(line.strip(), Equals("Q51,373,string,Europe"))

        self.assertThat(out.read(), Equals(""))
    def test_write_csv(self):
        out = StringIO()
        CsvWriter.write_csv(test_data, out)
        out.seek(0)

        line = out.readline()
        assert 'Q51,claim,31,wikibase-entityid,Q5107' == line.strip()

        line = out.readline()
        assert 'Q51,claim,373,string,Europe' == line.strip()

        line = out.readline()
        assert 'Q51,qualifier,1,string,qual' == line.strip()

        line = out.readline()
        assert 'Q51,reference,2,string,ref' == line.strip()

        assert '' == out.read()
Пример #6
0
    def test_write_csv(self):
        out = StringIO()
        CsvWriter.write_csv(test_data, out)
        out.seek(0)

        line = out.readline()
        assert 'Q51,claim,31,wikibase-entityid,Q5107' == line.strip()

        line = out.readline()
        assert 'Q51,claim,373,string,Europe' == line.strip()

        line = out.readline()
        assert 'Q51,qualifier,1,string,qual' == line.strip()

        line = out.readline()
        assert 'Q51,reference,2,string,ref' == line.strip()

        assert '' == out.read()
import logging
import argparse
import sys
import time

from propertysuggester.parser import JsonReader, CsvWriter
from propertysuggester.utils.CompressedFileType import CompressedFileType

if __name__ == "__main__":
    logging.basicConfig(level=20)  # Print logging.info

    parser = argparse.ArgumentParser(
        description="this program converts wikidata JSON dumps to CSV data.")
    parser.add_argument("input",
                        help="The JSON input file (a wikidata dump)",
                        type=CompressedFileType('r'))
    parser.add_argument("output",
                        help="The CSV output file (default=sys.stdout)",
                        default=sys.stdout,
                        nargs='?',
                        type=CompressedFileType('wb'))
    args = parser.parse_args()
    start = time.time()
    CsvWriter.write_csv(JsonReader.read_json(args.input), args.output)
    logging.info("total time: %.2fs" % (time.time() - start))
import logging
import argparse
import sys
import time

from propertysuggester.parser import JsonReader, CsvWriter
from propertysuggester.utils.CompressedFileType import CompressedFileType

if __name__ == "__main__":
    logging.basicConfig(level=20)  # Print logging.info

    parser = argparse.ArgumentParser(
        description="this program converts wikidata JSON dumps to CSV data.")
    parser.add_argument("input", help="The JSON input file (a wikidata dump)",
                        type=CompressedFileType('r'))
    parser.add_argument("output",
                        help="The CSV output file (default=sys.stdout)",
                        default=sys.stdout, nargs='?',
                        type=CompressedFileType('wb'))
    args = parser.parse_args()
    start = time.time()
    CsvWriter.write_csv(JsonReader.read_json(args.input), args.output)
    logging.info("total time: %.2fs" % (time.time() - start))