-
Notifications
You must be signed in to change notification settings - Fork 0
/
scan_mxds.py
345 lines (293 loc) · 12.3 KB
/
scan_mxds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
import sys
import os
import argparse
import re
#import xml.etree.ElementTree as ET
from lxml import etree as ET
from datetime import datetime
import arcpy
from openpyxl import Workbook
from openpyxl import load_workbook
import logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
XLS_TAB_NAME = "dataSource"
XLS_HEADERS = []
NETWORK_DRIVES = []
DATA_SOURCES = {}
DATA_TARGETS = []
XREF_TAB_NAME = "xRef"
XREF_HEADERS = []
LDRV_LL_PATHS = []
def ascii(s):
if s is not None:
return s.encode('ascii', 'replace')
else:
return s
def get_alias_path(lDrvPath):
for nd in NETWORK_DRIVES:
if lDrvPath.find(nd["Path"]) == 0:
return lDrvPath.replace(nd["Path"], nd["Drive"])
if lDrvPath.find(nd["Drive"]) == 0:
return lDrvPath.replace(nd["Drive"], nd["Path"])
return lDrvPath
def parse_data_folder(lDrvPath):
parts = lDrvPath.split('\\')
idx = -1
for p in list(reversed(range(0, len(parts)))):
if parts[p].find('.') > -1:
idx = p
break
return '\\'.join(parts[:idx])
def load_config(configFile):
tree = ET.parse(configFile)
root = tree.getroot()
for child in root.iter('xlsTab'):
# only one tab
XLS_TAB_NAME = child.attrib['name']
for child in root.iter('xlsHeader'):
XLS_HEADERS.append(child.attrib['name'])
# print XLS_HEADERS
for child in root.iter('networkDrive'):
NETWORK_DRIVES.append({
'Drive': child.attrib['drive'],
'Path': child.attrib['path']
})
# print NETWORK_DRIVES
for child in root.iter('source'):
nm = child.attrib['name']
if nm not in DATA_SOURCES.keys():
DATA_SOURCES[nm] = []
for sc in child:
DATA_SOURCES[nm].append({
'LDrv':sc.attrib['path'],
'Mode':sc.tag
})
DATA_SOURCES[nm].append({
'LDrv':get_alias_path(sc.attrib['path']),
'Mode':sc.tag
})
# print DATA_SOURCES
for child in root.iter('target'):
target = {}
target['name'] = child.attrib['name']
for sc in child:
target[sc.tag] = sc.text
DATA_TARGETS.append(target)
# print DATA_TARGETS
'''
for src in DATA_SOURCES:
for tgt in DATA_TARGETS:
if src['name'] == tgt['name'] and src['Mode'] == 'add':
LDRV_LL_PATHS.append({
'Name':src['name'],
'LDrv':src['LDrv'],
'LL':tgt['livelink']
})
'''
for child in root.iter('livelink'):
if child.attrib['name'] == 'xRef':
llPath = child.attrib['path']
for hdr in child.iter('xrefHeader'):
XREF_HEADERS.append(hdr.attrib['name'])
wb = load_workbook(filename = llPath, read_only=True)
ws = wb[XREF_TAB_NAME]
# skip the first 1 row (the headers)
r = 2
hdrs = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
while ws["A"+str(r)].value is not None:
xrefRecord = {}
for c in range(0, len(XREF_HEADERS)):
xrefRecord[XREF_HEADERS[c]] = ws[hdrs[c]+str(r)].value
LDRV_LL_PATHS.append(xrefRecord)
r = r + 1
wb.close()
del wb
break
# print LDRV_LL_PATHS
def get_source_type(lDrvPath):
for k in DATA_SOURCES.keys():
excluded = False
ds = DATA_SOURCES[k]
for cvt in [s for s in ds if s["Mode"] == "exclude"]:
if lDrvPath.find(cvt["LDrv"]) == 0:
excluded = True
break
if not excluded:
for cvt in [s for s in ds if s["Mode"] == "add"]:
if lDrvPath.find(cvt["LDrv"]) == 0:
return k
return None
def find_Livelink_path(lDrvPath):
folderPath = parse_data_folder(lDrvPath)
folderPathAlias = get_alias_path(folderPath)
for cvt in LDRV_LL_PATHS:
#if lDrvPath.find(cvt["Data Source"]) == 0 or lDrvPathAlias.find(cvt["Data Source"]) == 0:
if folderPath == cvt["Data Source"] or folderPathAlias == cvt["Data Source"]:
return cvt["Livelink URL"]
return None
def verify_layer_dataSource(lyr, lyrType):
try:
return arcpy.Exists(lyr)
except:
logger.error("failed to open layer [%s]" % fc)
return False
def write_to_workbook(wbPath, dsList, sheetName=None):
wb = Workbook()
ws1 = wb.active
ws1.title = XLS_TAB_NAME
# headers
for c in range(0, len(XLS_HEADERS)):
ws1.cell(row=1, column=c+1, value=XLS_HEADERS[c])
# content
s = 1 # skip the first 1 row
for r in range(0, len(dsList)):
for c in dsList[r]:
# TODO: add style to cell?
h = XLS_HEADERS.index(c)
if h > -1:
ws1.cell(row=r+s+1, column=h+1, value=dsList[r][XLS_HEADERS[h]])
else:
print('Invalid header [%s] in xls [%s]' % (c, mxdPath))
wb.save(filename = wbPath)
wb.close()
del wb
def scan_layers_in_mxd(mxdPath):
dsList = []
mxd = None
# scan the mxd file
try:
mxd = arcpy.mapping.MapDocument(mxdPath)
lyrs = arcpy.mapping.ListLayers(mxd)
dsList.append({"Name": "MXD File", "Data Source": mxdPath})
lyrTitle = 'Map Layer'
sourceTitle = 'Data Source'
print('%-60s%s' % (lyrTitle,sourceTitle))
print('%-60s%s' % ('-' * len(lyrTitle), '-' * len(sourceTitle)))
for lyr in lyrs:
if lyr.isFeatureLayer == True:
lyrType = "FeatureLayer"
elif lyr.isRasterLayer == True:
lyrType = "RasterLayer"
elif lyr.isServiceLayer == True:
lyrType = "ServiceLayer"
elif lyr.isGroupLayer == True:
lyrType = "GroupLayer"
else:
lyrType = "UnknownLayer"
try:
if lyrType in ["GroupLayer", "ServiceLayer", "UnknownLayer"]:
print('%-60s%s' % (ascii(lyr.name),"*** skip " + lyrType))
else:
# get the layer data source
ds = lyr.dataSource
print('%-60s%s' % (ascii(lyr.name),ds))
# get the layer def query
defQry = None
if lyr.supports("DEFINITIONQUERY") == True:
defQry = lyr.definitionQuery
# get the layer description
dspt = None
if lyr.supports("DESCRIPTION") == True:
dspt = lyr.description
#
# verify the layer data source
verified = verify_layer_dataSource(ds, lyrType)
# check if the layer is within the scope
srcType = get_source_type(ds)
#
# get the Livelink path
llPath = None
if lyrType == "FeatureLayer" and verified and srcType is not None:
llPath = find_Livelink_path(ds)
if llPath is None:
print('%-60s%s' % (ascii(lyr.name),"??? no Livelink path found for " + ds))
#
dsList.append({"Name": lyr.name, "Data Source": ds, "Layer Type": lyrType,
"Verified?": verified, "Definition Query": defQry, "Description": dspt,
"Livelink Link": llPath})
except:
print('%-60s%s' % (ascii(lyr.name), ">>> failed to retrieve info " + lyrType + ": " + str(sys.exc_info()[0])))
except:
print('Failed to process the mxd file [%s]: %s' % (mxdPath, sys.exc_info()[0]))
finally:
del mxd
return dsList
def list_layers_to_xls(mxdPath, mxdFolder, xlsFolder):
print('\nThe mxd file: %s' % mxdPath)
fname = os.path.basename(mxdPath)
fdir = os.path.dirname(mxdPath)
dsList = scan_layers_in_mxd(mxdPath)
wbFolder = fdir.replace(mxdFolder, xlsFolder)
if not os.path.exists(wbFolder):
os.makedirs(wbFolder)
wbFilePath = os.path.join(wbFolder, fname + ".xlsx")
print('\nThe xlsx file: %s' % wbFilePath)
write_to_workbook(wbFilePath, dsList)
def scan_mxd_in_folder(mxdFolder, xlsFolder, date_filters):
# parse date filters
lower_date = None
upper_date = None
if date_filters is not None:
dfs = re.split('<', date_filters)
if len(dfs) == 2:
if dfs[0] is not None and len(dfs[0]) > 0:
lower_date = datetime.datetime.strptime(dfs[0], "%Y/%m/%d")
if dfs[1] is not None and len(dfs[1]) > 0:
upper_date = datetime.datetime.strptime(dfs[1], "%Y/%m/%d")
# walk through all files
for root, dirs, files in os.walk(mxdFolder):
for fname in files:
if fname.endswith(".mxd"):
mxdPath = os.path.join(root, fname)
modified_time = datetime.datetime.fromtimestamp(os.stat(mxdPath).st_mtime)
# check against the filter
is_filter_met = True
if lower_date is not None:
is_filter_met = modified_time >= lower_date
if upper_date is not None:
is_filter_met = is_filter_met and modified_time <= upper_date
# work on the mxd file
if is_filter_met == True:
list_layers_to_xls(mxdPath, mxdFolder, xlsFolder)
def scan_missed_mxds(mxdFolder, xlsFolder):
missedMxds = []
# walk through all files
for root, dirs, files in os.walk(mxdFolder):
for fname in files:
if fname.endswith(".mxd"):
mxdPath = os.path.join(root, fname)
xlsPath = os.path.join(root.replace(mxdFolder, xlsFolder), fname + ".xlsx")
if not os.path.exists(xlsPath):
mxdModifiedTime = datetime.datetime.fromtimestamp(os.stat(mxdPath).st_mtime)
missedMxd = {
"Path": mxdPath,
"ModTime": mxdModifiedTime.strftime('"%Y-%m-%d"')
}
missedMxds.append(missedMxd)
missedMxds = sorted(missedMxds, key=lambda missedMxd: missedMxd['ModTime'])
for missedMxd in missedMxds:
print 'Found: %-10s %s' % (missedMxd['ModTime'], missedMxd['Path'])
list_layers_to_xls(missedMxd['Path'], mxdFolder, xlsFolder)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Scan mxds and list layers into spreadsheets')
parser.add_argument('-m','--mxd', help='MXD Folder (input)', required=True)
parser.add_argument('-x','--xls', help='XLS Folder (output)', required=True)
parser.add_argument('-a','--action', help='Action Options (scan, comp, single)', required=False, default='scan')
parser.add_argument('-f','--filter', help='Filter by dates. Ex. 2015/2/19<2016/2/18', required=False, default=None)
parser.add_argument('-s','--single', help='A single MXD file (input for action: single', required=False, default=None)
parser.add_argument('-c','--cfg', help='Config File', required=False, default=r'H:\MXD_Scan\config.xml')
params = parser.parse_args()
if params.cfg is not None:
load_config(params.cfg)
if params.action == 'scan':
scan_mxd_in_folder(params.mxd, params.xls, params.filter)
print('###### Completed scaning all mxd files in %s'%params.mxd)
elif params.action == 'comp':
scan_missed_mxds(params.mxd, params.xls)
print('###### Completed compareing mxd files bewteen %s and %s'% (params.mxd, params.xls))
elif params.action == 'single':
list_layers_to_xls(params.single, params.mxd, params.xls)
print('###### Completed scaning the mxd file at %s'%params.single)
else:
print 'Error: unknown action [%s] for scanning' % params.action