q learning algorithm

Solutions on MaxInterview for q learning algorithm by the best coders in the world

showing results for - "q learning algorithm"
26 Oct 2017
1if start_q_table is None:
2    # initialize the q-table#
3    q_table = {}
4    for i in range(-SIZE+1, SIZE):
5        for ii in range(-SIZE+1, SIZE):
6            for iii in range(-SIZE+1, SIZE):
7                    for iiii in range(-SIZE+1, SIZE):
8                        q_table[((i, ii), (iii, iiii))] = [np.random.uniform(-5, 0) for i in range(4)]
9
10else:
11    with open(start_q_table, "rb") as f:
12        q_table = pickle.load(f)
13
14
15# can look up from Q-table with: print(q_table[((-9, -2), (3, 9))]) for example
16
17episode_rewards = []
18
19for episode in range(HM_EPISODES):
20    player = Blob()
21    food = Blob()
22    enemy = Blob()
23    if episode % SHOW_EVERY == 0:
24        print(f"on #{episode}, epsilon is {epsilon}")
25        print(f"{SHOW_EVERY} ep mean: {np.mean(episode_rewards[-SHOW_EVERY:])}")
26        show = True
27    else:
28        show = False
29
30    episode_reward = 0
31    for i in range(200):
32        obs = (player-food, player-enemy)
33        #print(obs)
34        if np.random.random() > epsilon:
35            # GET THE ACTION
36            action = np.argmax(q_table[obs])
37        else:
38            action = np.random.randint(0, 4)
39        # Take the action!
40        player.action(action)
41
42        #### MAYBE ###
43        #enemy.move()
44        #food.move()
45        ##############
46
47        if player.x == enemy.x and player.y == enemy.y:
48            reward = -ENEMY_PENALTY
49        elif player.x == food.x and player.y == food.y:
50            reward = FOOD_REWARD
51        else:
52            reward = -MOVE_PENALTY
53
54        new_obs = (player-food, player-enemy)
55        max_future_q = np.max(q_table[new_obs])
56        current_q = q_table[obs][action]
57
58        if reward == FOOD_REWARD:
59            new_q = FOOD_REWARD
60        else:
61            new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward + DISCOUNT * max_future_q)
62        q_table[obs][action] = new_q
63
64        if show:
65            env = np.zeros((SIZE, SIZE, 3), dtype=np.uint8)  # starts an rbg of our size
66            env[food.x][food.y] = d[FOOD_N]  # sets the food location tile to green color
67            env[player.x][player.y] = d[PLAYER_N]  # sets the player tile to blue
68            env[enemy.x][enemy.y] = d[ENEMY_N]  # sets the enemy location to red
69            img = Image.fromarray(env, 'RGB')
70            img = img.resize((300, 300))
71            cv2.imshow("image", np.array(img))
72            if reward == FOOD_REWARD or reward == -ENEMY_PENALTY:
73                if cv2.waitKey(500) & 0xFF == ord('q'):
74                    break
75            else:
76                if cv2.waitKey(1) & 0xFF == ord('q'):
77                    break
78
79        episode_reward += reward
80        if reward == FOOD_REWARD or reward == -ENEMY_PENALTY:
81            break