|
1 | 1 | import time
|
2 |
| -import numpy as np |
3 | 2 | from itertools import chain
|
4 | 3 |
|
| 4 | +import numpy as np |
| 5 | +import rlviser_py |
5 | 6 | from rlgym.api import RLGym
|
6 | 7 | from rlgym.rocket_league.action_parsers import LookupTableAction, RepeatAction
|
7 |
| -from rlgym.rocket_league.done_conditions import GoalCondition, AnyCondition, TimeoutCondition, NoTouchTimeoutCondition |
| 8 | +from rlgym.rocket_league.done_conditions import ( |
| 9 | + AnyCondition, |
| 10 | + GoalCondition, |
| 11 | + NoTouchTimeoutCondition, |
| 12 | + TimeoutCondition, |
| 13 | +) |
8 | 14 | from rlgym.rocket_league.obs_builders import DefaultObs
|
9 | 15 | from rlgym.rocket_league.reward_functions import CombinedReward, GoalReward, TouchReward
|
10 | 16 | from rlgym.rocket_league.sim import RocketSimEngine
|
11 |
| -from rlgym.rocket_league.state_mutators import MutatorSequence, FixedTeamSizeMutator, KickoffMutator |
| 17 | +from rlgym.rocket_league.state_mutators import ( |
| 18 | + FixedTeamSizeMutator, |
| 19 | + KickoffMutator, |
| 20 | + MutatorSequence, |
| 21 | +) |
12 | 22 |
|
13 | 23 | from gym_renderer import RLViserRenderer
|
14 | 24 |
|
15 | 25 | if __name__ == "__main__":
|
| 26 | + game_speed = 1 |
| 27 | + |
16 | 28 | env = RLGym(
|
17 | 29 | state_mutator=MutatorSequence(
|
18 |
| - FixedTeamSizeMutator(blue_size=2, orange_size=2), |
19 |
| - KickoffMutator() |
| 30 | + FixedTeamSizeMutator(blue_size=2, orange_size=2), KickoffMutator() |
20 | 31 | ),
|
21 | 32 | obs_builder=DefaultObs(zero_padding=None),
|
22 | 33 | action_parser=RepeatAction(LookupTableAction(), repeats=1),
|
23 |
| - reward_fn=CombinedReward( |
24 |
| - (GoalReward(), 10.), |
25 |
| - (TouchReward(), 0.1) |
26 |
| - ), |
| 34 | + reward_fn=CombinedReward((GoalReward(), 10.0), (TouchReward(), 0.1)), |
27 | 35 | termination_cond=GoalCondition(),
|
28 | 36 | truncation_cond=AnyCondition(
|
29 |
| - TimeoutCondition(timeout=300.), |
30 |
| - NoTouchTimeoutCondition(timeout=30.) |
| 37 | + TimeoutCondition(timeout=300.0), NoTouchTimeoutCondition(timeout=30.0) |
31 | 38 | ),
|
32 | 39 | transition_engine=RocketSimEngine(),
|
33 |
| - renderer=RLViserRenderer(120) |
| 40 | + renderer=RLViserRenderer(120), |
34 | 41 | )
|
35 | 42 |
|
36 | 43 | # simulate 2 episodes
|
37 | 44 | for _ in range(2):
|
38 | 45 | obs_dict = env.reset()
|
39 | 46 | steps = 0
|
40 |
| - ep_reward = {agent_id: 0. for agent_id in env.agents} |
| 47 | + ep_reward = {agent_id: 0.0 for agent_id in env.agents} |
41 | 48 | t0 = time.time()
|
42 | 49 | while True:
|
43 | 50 | actions = {}
|
|
46 | 53 | actions[agent_id] = np.random.randint(action_space, size=(1,))
|
47 | 54 |
|
48 | 55 | for _ in range(8):
|
49 |
| - obs_dict, reward_dict, terminated_dict, truncated_dict = env.step(actions) |
| 56 | + obs_dict, reward_dict, terminated_dict, truncated_dict = env.step( |
| 57 | + actions |
| 58 | + ) |
50 | 59 | env.render()
|
51 |
| - time.sleep(max(0, t0 + steps / 1200 - time.time())) |
| 60 | + time.sleep(max(0, t0 + steps / (120 * game_speed) - time.time())) |
52 | 61 | steps += 1
|
53 | 62 |
|
54 | 63 | for agent_id, reward in reward_dict.items():
|
|
57 | 66 | if any(chain(terminated_dict.values(), truncated_dict.values())):
|
58 | 67 | break
|
59 | 68 |
|
| 69 | + game_speed = rlviser_py.get_game_speed() |
| 70 | + |
60 | 71 | ep_time = time.time() - t0
|
61 |
| - print(f"Steps per second: {steps / ep_time:.0f} | Episode time: {ep_time:.2f} | Episode Reward: {max(ep_reward.values()):.2f}") |
| 72 | + print( |
| 73 | + f"Steps per second: {steps / ep_time:.0f} | Episode time: {ep_time:.2f} | Episode Reward: {max(ep_reward.values()):.2f}" |
| 74 | + ) |
| 75 | + |
| 76 | + env.close() |
0 commit comments