def randomSteps(env,steps,dqn):
    t0 = time.time()
    env.reset()
    i = 0
    frame_stack = Pipe(4)
    initial_no_op = np.random.randint(4, NO_OP_MAX)

    for _ in range(0,steps):
        if i < initial_no_op:
            # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS
            action = NO_OP_CODE
            state, reward, done, info = env.step(action)
            greyObservation = rgb2gray(state)
            state = downSample(greyObservation)
            frame_stack.push(state)
            i += 1
        else:

            state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

            action = np.random.randint(0, len(ACTIONS))
            actionH =np.zeros(len(ACTIONS))
            actionH[action] = 1
            next_state, reward, game_over, info = env.step(action)


            greyObservation = rgb2gray(next_state)
            next_state = downSample(greyObservation)

            frame_stack.push(next_state)

            next_state = np.stack(frame_stack.items, axis=2).reshape((IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

            dqn.storeExperience(
                state.astype(type),
                actionH,
                reward,
                next_state.astype(type),
                game_over)
            if done:
                #print("Episode finished after {} timesteps".format(_ + 1))
                env.reset()
                i=0
                frame_stack=[]




    t1 = time.time()
    print("Fullfilling replay memory operation took:",t1-t0,)
    print('Size of replay memory %s bytes and has %s elements' % ((sys.getsizeof(dqn.replayMemory)),len(dqn.replayMemory)))
    print
    #Saving and loading networks
    saver = tf.train.Saver()
    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path and LOAD_NETWORK:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
        game = int(re.match('.*?([0-9]+)$', checkpoint.model_checkpoint_path).group(1))
    else:
        print("Could not find old network weights")

    sess.run(tf.global_variables_initializer())
    game = 0
    game_scores =[]
    initial_no_op = np.random.randint(4, NO_OP_MAX)
    i=0
    frame_stack=Pipe(4)
    score=0
    summary_writer = tf.summary.FileWriter('logs',sess.graph)
    summary_writer = tf.summary.FileWriter(LOG_DIRECTORY + RUN_STRING,
                                           sess.graph)
    print('Started training')
    for step in range(STEPS):
        if i < initial_no_op:
            # WE PERFORM A RANDOM NUMBER OF NO_OP ACTIONS
            action = NO_OP_CODE
            state, reward, done, info = env.step(action)
            greyObservation = rgb2gray(state)
            state = downSample(greyObservation)
            frame_stack.push(state)
            i+=1
        else: