-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_usage_postproc.py
executable file
·138 lines (114 loc) · 3.9 KB
/
cluster_usage_postproc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python
"""
Obtain detailed usage information for the ICS-ACI cluster
"""
# TODO: Kill errant thread(s)
# TODO: Other interesting things...
from argparse import ArgumentParser
import os
import re
import dateutil.parser
import matplotlib as mpl
mpl.use('agg')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from plotGoodies import removeBorder
def parse_args(description=__doc__):
parser = ArgumentParser(description=description)
parser.add_argument(
'-d', metavar='DIR', required=True,
help='Log dir'
)
return parser.parse_args()
FILENAME_DATETIME_RE = re.compile(r'cluster_usage.(.+)')
def get_datetime(filepath):
_, filename = os.path.split(filepath)
base, _ = os.path.splitext(filename)
dt_str = FILENAME_DATETIME_RE.findall(base)[0]
return dateutil.parser.parse(dt_str)
NODE_INFO_RE = re.compile(
r'^comp-(?P<nodetype>st|hm)-(?P<nodenum>[0-9])+ .*'
r'cores = (?P<cores>[0-9]+), mem = *(?P<mem>[0-9]+) .*\[ *'
r'(?P<load>[0-9]+) *(?P<memusage>[0-9]+)'
)
def get_info(args):
args.d = os.path.expandvars(os.path.expanduser(args.d))
records = []
for filename in os.listdir(args.d):
filepath = os.path.join(args.d, filename)
if not os.path.isfile(filepath):
continue
try:
dt = get_datetime(filepath)
except Exception, err:
print err
continue
with file(os.path.join(args.d, filepath), 'r') as f:
contents = f.readlines()
for line in contents:
if line[0] == '[':
if line.strip() == '[failed_to_report]':
break
else:
continue
info = dict(dt=dt)
match = NODE_INFO_RE.match(line)
if match is None:
continue
info.update(match.groupdict())
info['cores'] = int(info['cores'])
info['load'] = float(info['load'])/100
info['mem'] = float(info['mem'])
info['memusage'] = float(info['memusage'])/100
info['nodenum'] = int(info['nodenum'])
records.append(info)
records = pd.DataFrame(records)
records.sort_values(by=['dt', 'nodetype', 'nodenum'], inplace=True)
return records
def main():
"""main"""
records = get_info(parse_args())
slot_mem = records['mem'] / records['cores']
cores_avail = np.clip(
records['cores'] * (1 - records['load']),
a_min=0, a_max=np.inf
)
mem_avail = np.clip(
records['mem'] * (1 - records['memusage']),
a_min=0, a_max=np.inf
)
records['slots_avail'] = np.min([cores_avail, mem_avail//slot_mem],
axis=0)
summary = []
for dt, dt_group in records.groupby('dt'):
for nodetype, nt_group in dt_group.groupby('nodetype'):
capacity = np.sum(nt_group['cores'])
available = np.sum(nt_group['slots_avail'])
used = capacity - available
summary.append({
'dt': dt,
'nodetype': nodetype,
'Percent slots in use': used / capacity*100,
'Slots available': available,
'Total slots': capacity
})
summary = pd.DataFrame(summary)
summary.set_index('dt', inplace=True)
for nt, nt_group in summary.groupby('nodetype'):
kind = 'High Memory' if nt == 'hm' else 'Standard Memory'
ax = nt_group.plot(y='Percent slots in use', title=kind)
ax.set_xlabel('Date, hour')
ax.set_ylabel('Percent')
removeBorder(ax)
ax.set_ylim(0, 100) #ax.get_ylim()[1])
#xlims = ax.get_xlim()
#ax.plot(xlims, [
plt.tight_layout()
plt.savefig(nt + '.png', dpi=300)
plt.savefig(nt + '.pdf')
plt.draw()
plt.show()
return records, summary
if __name__ == '__main__':
records, summary = main()