forked from larryhou/xls2xml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
xls2xml.py
126 lines (96 loc) · 3.78 KB
/
xls2xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#encoding:gb2312
import sys
import os
import re
from xlrd import open_workbook, cellname
import lxml.etree as etree
import ntpath
decoding="gb2312"
encoding="utf-8"
def test(condition, msg, errorCode = 110):
if not condition:
if errorCode == 0:
stderr(msg)
else:
stderr("ERR" + str(errorCode) + ":", msg)
sys.exit(errorCode)
def stderr(*argv):
print >> sys.stderr, ' '.join(str(x) for x in argv)
class XMLCfg:
def __init__(self, node = None, name = None, root = None):
self.map = {}
self.node = node
self.name = name
self.root = root
def createNode(node, imap):
data = etree.tostring(node, encoding=encoding)
for key in imap.keys():
data = data.replace("{" + key.encode(encoding) + "}", imap[key].encode(encoding))
node = etree.fromstring(data)
return node
def excel2xml(sheet, data):
if data.root == None:
data.root = "root"
root = etree.Element(data.root)
for r in range(1, sheet.nrows):
imap={}
for c in range(sheet.ncols):
value = sheet.cell(r,c).value
if not isinstance(value, unicode):
if isinstance(value, float) and float.is_integer(value):
value = int(value)
value = str(value).decode(decoding)
key = data.map.get(c)
imap[key] = value
if data.node != None:
item = createNode(data.node.__copy__(), imap)
root.append(item)
else:
item = etree.SubElement(root, "item")
for key in imap.keys():
item.set(key, imap[key])
return root
def parseXMLCfg(url):
data = etree.parse(url)
test(data != None, "导表模板解析失败")
node = data.find("exportNode").getchildren()[0]
test(node != None, "导表模板[exportNode]为空")
name = data.find("sheetName").text
test(name != None, "导表模板[sheetName]为空")
root = data.find("exportRoot").getchildren()[0].tag
return XMLCfg(node, name, root)
def convert(xls_path):
book = open_workbook(xls_path)
sheet = book.sheet_by_name(cfg.name)
for r in range(1):
for c in range(sheet.ncols):
value = sheet.cell(r, c).value
if not isinstance(value, unicode) and value:
value = str(int(value)).decode(decoding)
cfg.map[c] = value
root = excel2xml(sheet, cfg)
result = etree.tostring(root, encoding=encoding, pretty_print=True)
return result
if __name__=="__main__":
test(len(sys.argv) == 4, "usage: xls2xml xls_path cfg_path output", 0)
xls_path = sys.argv[1]
cfg_path = sys.argv[2]
output = sys.argv[3]
test(os.path.exists(xls_path), "EXCEL文件[" + xls_path + "]不存在", 404)
test(re.search(r'\.xlsx?$', xls_path.lower()) != None, "[" + xls_path + "]不是EXCEL文件")
test(os.path.exists(cfg_path), "导表模板[" + cfg_path + "]不存在", 404)
test(cfg_path[-4:].lower() == ".xml", "导表模板[" + cfg_path + "]不是XML文件")
cfg = parseXMLCfg(cfg_path)
test(output != None, "XML输出目录为空");
if output[-4:].lower() != ".xml":
output = output + "/" + cfg.name + ".xml"
print ">>> " + output
folder = ntpath.abspath(ntpath.split(output)[0])
folder = folder.replace("\\", "/")
if not os.path.exists(folder):
os.makedirs(folder)
result = "<?xml version='1.0' encoding='utf-8'?>\n" + convert(xls_path)
print "" + output + "\t-> size:" + format(len(result), ",")
f = open(output, 'wb')
f.write(result)
f.close()