-
Notifications
You must be signed in to change notification settings - Fork 0
/
csv2xmlgen.py
executable file
·388 lines (343 loc) · 13.7 KB
/
csv2xmlgen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# csv2xmlgen.py
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright © 2008 Jose Riguera Lopez <jriguera@gmail.com>
#
"""
A program to generate XML files based on a input template. It has been
developed for generating KML files (Google Earth layers) from CSV data,
but it can be used to process any kind of XML defined with the suitable
template.
"""
__program__ = "csv2xmlgen"
__author__ = "Jose Riguera Lopez <jriguera@gmail.com>"
__version__ = "0.4.0"
__date__ = "December 2011"
__license__ = "GPL (v3 or later)"
__copyright__ ="(c) Jose Riguera, October 2008"
import os
import sys
import getopt
import time
import re
import csv
import codecs
import gettext
import locale
import ConfigParser
import xml.dom.minidom
# Modules directory
sys.path.append("lib")
try:
import sxmltemplate
except ImportError:
print "Sorry, you don't have the SXMLTemplate package installed, and this"
print "script relies on it. Please copy SXMLTemplate package into lib and try again."
sys.exit(1)
try:
import toolbox
except ImportError:
print "Sorry, you don't have the ToolBox package installed, and this"
print "script relies on it. Please copy ToolBox.package into lib and try again."
sys.exit(1)
# I18N gettext support
__GETTEXT_DOMAIN__ = "csv2xmlgen"
__PACKAGE_DIR__ = os.path.abspath(os.path.dirname(__file__))
__LOCALE_DIR__ = os.path.join(__PACKAGE_DIR__, "locale")
try:
if not os.path.isdir(__LOCALE_DIR__):
print "Error: Cannot locate default locale dir: '%s'." % (__LOCALE_DIR__)
__LOCALE_DIR__ = None
locale.setlocale(locale.LC_ALL,"")
gettext.install(__GETTEXT_DOMAIN__, __LOCALE_DIR__)
except Exception as e:
_ = lambda s: unicode(s)
print "Error setting up the translations: %s" % (e)
# Default Values
CSVdelimiter = ','
CSVquotechar = '"'
CSVencoding = "utf-8 windows-1252 iso-8859-1 iso-8859-2 us-ascii"
XMLseparatorKey = "|"
XMLdefaultValue = " "
XMLdeletetag = ""
XMLlineterminator = "\\n"
def main(options):
# main program
input = options['main']['csvinputfile']
try:
fd = toolbox.openReadAnything(input)
except IOError as (errno, strerror):
d = {'input': input, 'errno': errno, 'strerror': strerror }
toolbox.die(_("Cannot open '%(input)s' for reading: I/O error (%(errno)s): %(strerror)s.") % d)
output = options['main']['xmloutputfile']
try:
fdout = toolbox.openWriteAnything(output)
except IOError as (errno, strerror):
d = {'output': output, 'errno': errno, 'strerror': strerror }
toolbox.die(_("Cannot open '%(output)s' for writing: I/O error (%(errno)s): %(strerror)s.") % d)
# Set up the XML Template
template = options['main']['xmltemplate']
try:
sxmltemplate.SXMLTemplate.separatorKey = options['main']['separatorkey']
sxmltemplate.SXMLTemplate.defaultValue = options['main']['defaultvalue']
sxmltemplate.SXMLTemplate.deletetag = options['main']['deletetag']
xmltemplate = sxmltemplate.SXMLTemplate(template)
xmltemplate.setTemplates(options['main']['templatenodes'])
except sxmltemplate.SXMLTemplateError as e:
toolbox.die(_("Cannot parse XML template: %s.") % e)
xmltemplate.setRootInfo(options['template'])
d = { 'input': input, 'xmltemplate': template }
msg(_("Processing input file '%(input)s' with XML template '%(xmltemplate)s' ... ") % d)
# Process CSV data
lines = 0
try:
lines = processCsv(fd, xmltemplate, options['csv'])
except Exception as e:
msg(_("Processing CSV data: '%s'.") % e, _("- ERROR: "))
msg(_("~ %s processed lines.") % lines)
# Close things and do signature
fd.close()
dom = xmltemplate.getDom()
sign = "XML generated with " + __program__ + " version " + __version__ + " at " + time.asctime() + "."
comment = dom.createComment(sign)
dom.appendChild(comment)
sign = __program__ + " " + __copyright__ + ", a " + __license__ + " program created by " + __author__ + "."
comment = dom.createComment(sign)
dom.appendChild(comment)
dom.writexml(fdout, "", " ", toolbox.unescape(options['xml']['lineterminator']), "utf-8")
fdout.close()
msg(_("XML file '%s' generated.") % output)
def processCsv(fd, xmltemplate, options):
lines = 0
header = options['headers']
delimiter = options['delimiter']
quotechar = options['quotechar']
encodings = options['encoding']
if header:
reader = csv.DictReader(fd, fieldnames=header, delimiter=delimiter, quotechar=quotechar)
else:
reader = csv.DictReader(fd, delimiter=delimiter, quotechar=quotechar)
try:
for row in reader:
#print row
# utf-8 -> unicode because CSV works in utf-8!!
data = {}
for k,v in row.iteritems():
# try to determine the best csv encoding
key = k.strip()
if v != None and len(v) > 0:
for enc in encodings:
try:
data[key] = unicode(v, enc, "strict")
#data[k.strip()] = v.decode(enc)
break
except:
pass
else:
data[key] = v
xmltemplate.setData(data)
lines = lines + 1
except csv.Error as e:
d = {'line': lines, 'exception': e }
msg(_("Reading CSV file, line %(line)s: %(exception)s.") % d, _("- ERROR: "))
return lines
return lines
def msg(m, begin="* ", end='\n'):
sys.stderr.write("%s%s%s" % (begin, m, end))
def usage():
d = {}
d['program'] = __program__
use = _("""
[python] %(program)s [-c <file>] [-i <file>] [-n 'root.node ...'] [-o <file>] [-t <file>]
Options:
-c, --config <file> Configuration file.
-h, --help Show this help and exits.
-i, --input <file> Input data in CSV format.
-n, --templatenodes "root.node" Template node(s) (in XML Template).
-o, --output <file> XML output file.
-t, --template <file> XML input Template.
The XML resulting file is generated from the structure of the XML Template
file by repeating those nodes indicated by "templatenodes" (see configuration
file) for each line of data in the CSV input file. To do this, the program
replaces the headers listed in XML Template with data from each line of the
CSV file, generating the same number of nodes that lines in CSV file. The
nodes are indicated with the complete 'path' from the root node of the XML
document to the node to be "repeated", each element is separated by '.' ,
for example: "XMLRootElement.element.node". The root node is not a valid
template node, for obvious reasons.
The CSV input file and/or XML output file can be stdin and stdout
(respectively) by simply stating '-' instead of the file name.
The program looks for the default configuration file "csv2xmlgen.cfg", but it
is not necessary if all the arguments of the program are supplied, the rest
of arguments take the default value. These entries are permitted and
established by default in the configuration file:
# csv2xmlgen configuration file.
# All sections are mandatory, even without content.
# This is a comment, not processed, and ...
; this is another comment that was not taken into account.
# Here are the default value of program parameters.
# Main options
[main]
CsvInputFile = entrada.csv
XmlOutputFile = salida.xml
XmlTemplate = plantilla.xml
TemplateNodes = XMLRootElement.element0.node1 XMLRootElement.element2.node0
# Separator to indicate default values in the XML Template.
SeparatorKey = |
# Default value for data without default value in the XML template file
DefaultValue =
# Delete entrys (and children) with this tag if key is not found.
DeleteTag = -
# CSV input file (Comma Separated Values)
[csv]
# If the CSV has no header, must be given here
; Headers = Header1 header2 Header3 Header4
# CSV separator data character
delimiter =,
# Escape character
; quotechar = `"`
# Data encoding. Try to determine the best encoding from de list below
encoding = utf-8 windows-1252 iso-8859-1 iso-8859-2 us-ascii
# XML output data
[xml]
# End of line character
; lineterminator = \n
# Other values
[template]
author = Noe
date = December 2008
Example of input template file:
<XMLRootElement Author="%%(author)s" date='%%(date|ano da pera)s'>
<element0 id='%%(NUMBER|0)s'>
<node1 id='%%(NUMBER|0)s'>
The content is %%(CONTENIDO0|no content)s .
</node1>
</element0>
<element2>
<node0 id='%%(NUMBER|0)s'>
The content is %%(CONTENIDO2|-)s .
</node0>
</element2>
</XMLRootElement>
CSV input data file for the above XML template:
NUMBER,CONTENIDO0,CONTENIDO2
0,prueba0,TEST 0
1,,
2,data 2,TEST 2
The output file is left as an exercise for the reader ...
""")
print use % d
print "\n" + __program__ + " version " + __version__ + ", " + __copyright__
print "Created by " + __author__
license = _("""
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
""")
print license
if __name__ == "__main__":
# Args
try:
longopts = ["config=", "input=","output=","template=", "templatenodes", "help"]
opts, args = getopt.getopt(sys.argv[1:], "c:i:o:t:n:h", longopts)
except getopt.GetoptError:
usage()
sys.exit(2)
output = None
input = None
template = None
# Set up default config file name
config = os.path.splitext(sys.argv[0])[0] + '.cfg'
templatenodes = None
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
if o in ("-c", "--config"):
config = a
if o in ("-i", "--input"):
input = a
if o in ("-o", "--output"):
output = a
if o in ("-t", "--template"):
template = a
if o in ("-n", "--templatenodes"):
templatenodes = a
# Check if it is necessary a config file ...
if output and input and template and templatenodes:
needconfigfile = False
else:
msg(_("Searching config file ..."))
needconfigfile = True
# Read the config file ...
hasconfigfile = True
configuration = ConfigParser.ConfigParser()
if not os.path.isfile(config):
hasconfigfile = False
if needconfigfile:
toolbox.die(_("Cannot find the configuration file '%s'.") % os.path.basename(config))
if hasconfigfile:
try:
msg(_("Reading configuration file '%s' ...") % os.path.basename(config))
configuration.read(config)
except:
toolbox.die(_("Cannot understand config file format '%s'.") % os.path.basename(config))
# Override the configuration values of the file if they are in the
# line parameters of the program.
defaultconfig = {}
defaultconfig['main'] = {}
defaultconfig['main']['csvinputfile'] = input
defaultconfig['main']['xmloutputfile'] = output
defaultconfig['main']['xmltemplate'] = template
defaultconfig['main']['templatenodes'] = templatenodes
defaultconfig['csv'] = {}
defaultconfig['xml'] = {}
defaultconfig['template'] = {}
if hasconfigfile:
options = toolbox.configRead(configuration, defaultconfig)
else:
msg(_("Using default compiled values ..."))
options = defaultconfig
if options['main'].has_key('templatenodes'):
options['main']['templatenodes'] = options['main']['templatenodes'].split()
else:
options['main']['templatenodes'] = []
if options['csv'].has_key('headers'):
options['csv']['headers'] = options['csv']['headers'].split()
else:
options['csv']['headers'] = []
# Default values if they were not set up
options['csv'].setdefault('delimiter', CSVdelimiter)
options['csv'].setdefault('quotechar', CSVquotechar)
options['csv'].setdefault('encoding', CSVencoding)
options['csv']['encoding'] = options['csv']['encoding'].split()
options['main'].setdefault('separatorkey', XMLseparatorKey)
options['main'].setdefault('defaultvalue', XMLdefaultValue)
options['main'].setdefault('deletetag', XMLdeletetag)
options['xml'].setdefault('lineterminator', XMLlineterminator)
# Main program
main(options)
sys.exit()
#EOF