-
Notifications
You must be signed in to change notification settings - Fork 0
/
inception1.py
129 lines (117 loc) · 5.07 KB
/
inception1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests the graph placer."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.core.protobuf import device_properties_pb2
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import meta_graph
from tensorflow.python.framework import ops as tf_ops
from tensorflow.python.grappler import cluster as clusters
from tensorflow.python.grappler import graph_placer
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.platform import test
import tensorflow as tf
from nets import inception
cluster = tf.train.ClusterSpec({"local": ["172.23.10.2:2222", "172.23.10.3:2223", "172.23.10.4:2224", "172.23.10.6:2225"]})
server1 = tf.train.Server(cluster, job_name="local", task_index=0)
class GraphPlacerTest():
@staticmethod
def _buildInception():
g = tf.Graph()
train_batch_size = 5
eval_batch_size = 2
height, width = 299, 299
num_classes = 1000
with g.as_default():
train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
logits, _ = inception.inception_v3(train_inputs, num_classes)
predictions = tf.argmax(logits, 1)
train_op = g.get_collection_ref(tf_ops.GraphKeys.TRAIN_OP)
train_op.append(predictions)
return g
@staticmethod
def _buildCluster(num_cpus=1, num_gpus=1):
devices = []
if num_gpus > 0:
device_properties = device_properties_pb2.DeviceProperties(
type='GPU',
vendor='NVidia',
model='Tesla K40m',
frequency=745, #745 MHZ
num_cores= 2888, # CUDA Cores
environment={'architecture': '5.2',
'cuda': '10000',
'cudnn': '7031'},
num_registers=65536,
l1_cache_size=65536, #64KB
l2_cache_size=1572864, #1.5 MB
shared_memory_size_per_multiprocessor=49152, #49152 bytes
memory_size=12884901888, # 12GB
bandwidth=288000000) #288 GBps)
devices.append(
device_properties_pb2.NamedDevice(
properties=device_properties, name='/job:local/task:0/device:GPU:0'))
devices.append(
device_properties_pb2.NamedDevice(
properties=device_properties, name='/job:local/task:1/device:GPU:0'))
devices.append(
device_properties_pb2.NamedDevice(
properties=device_properties, name='/job:local/task:2/device:GPU:0'))
devices.append(
device_properties_pb2.NamedDevice(
properties=device_properties, name='/job:local/task:3/device:GPU:0'))
device_properties = device_properties_pb2.DeviceProperties(
type='CPU',
frequency=2399,
num_cores=32,
l1_cache_size=32768,
l2_cache_size=262144,
l3_cache_size=20971520)
devices.append(
device_properties_pb2.NamedDevice(
properties=device_properties, name='/job:local/task:0/device:CPU:0'))
devices.append(
device_properties_pb2.NamedDevice(
properties=device_properties, name='/job:local/task:1/device:CPU:0'))
devices.append(
device_properties_pb2.NamedDevice(
properties=device_properties, name='/job:local/task:2/device:CPU:0'))
devices.append(
device_properties_pb2.NamedDevice(
properties=device_properties, name='/job:local/task:3/device:CPU:0'))
return clusters.Cluster(devices=devices)
def testBuild(self):
graph = GraphPlacerTest._buildInception()
mg = meta_graph.create_meta_graph_def(graph=graph)
#gcluster = cluster.Cluster(devices=None) # Automatically generates local machine cluster
gcluster = GraphPlacerTest._buildCluster()
print(gcluster.ListDevices()) # Print clust info
# Spend 15 seconds trying to optimize the placement of the model. This
# should give us enough time to exercise the code, but not enough to find
# a good placement, so we'll just check for legality.
placed_mg = graph_placer.PlaceGraph(mg, allotted_time=108000, cluster=gcluster, verbose=True)
placed_g = placed_mg.graph_def;
meta_graph.export_scoped_meta_graph(filename="./g/g.meta", graph_def=placed_g)
# node in placed_mg.graph_def.node:
# print(node)
if __name__ == '__main__':
placer = GraphPlacerTest()
placer.testBuild()