# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pycuda.driver as drv from nervanagpu import NervanaGPU from pycuda.autoinit import context from scikits.cuda import cublas print context.get_device().name() handle = cublas.cublasCreate() start, end = (drv.Event(), drv.Event()) def cublas_dot(A, B, C, alpha=1.0, beta=0.0, repeat=1): lda = max(A.strides) // 4 ldb = max(B.strides) // 4 ldc = max(C.strides) // 4 opA = 't' if A.is_trans else 'n' opB = 't' if B.is_trans else 'n' op = opB + opA
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pycuda.driver as drv from nervanagpu import NervanaGPU from pycuda.autoinit import context from scikits.cuda import cublas print(context.get_device().name()) start, end = (drv.Event(), drv.Event()) handle = cublas.cublasCreate() def cublas_dot(A, B, C, alpha=1.0, beta=0.0, repeat=1): lda = max(A.strides) // 4 ldb = max(B.strides) // 4 ldc = max(C.strides) // 4 opA = 't' if A.is_trans else 'n' opB = 't' if B.is_trans else 'n' op = opB + opA
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pycuda.driver as drv from nervanagpu import NervanaGPU from pycuda.autoinit import context from operator import mul print context.get_device().name() np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .3f" % x}) dtype = np.float16 cpu = 1 repeat = 1 ng = NervanaGPU(stochastic_round=False, bench=True) pool = ng.pool_layer( "max", 64, # N 64,1,64,64, # C,D,H,W 4,1,2,2, # J,T,R,S 0,0,0,0, # padding
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pycuda.driver as drv from pycuda.autoinit import context from nervanagpu import NervanaGPU from nervanagpu.layers import DataLayer, ConvLayer, PoolLayer, FullLayer print(context.get_device().name()) # Compare results here: # https://github.com/soumith/convnet-benchmarks # number of full iterations loops = 10 # show bechmark details for each layer layer_bench = 0 # show layer stats after each operation print_stats = 0 ng = NervanaGPU(bench=layer_bench) # don't learn, just benchmark momentum = 0.0