default actor=adam3-64, critic=adam3-64, b=256, ep=245k
This commit is contained in:
parent
5c11322e05
commit
0a78c5a7d9
|
@ -1 +1,4 @@
|
|||
.ipynb*
|
||||
/model/*/*/*
|
||||
!/model/*/*/actor*
|
||||
!/model/*/*/critic*
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
import os
|
||||
from maddpg.maddpg import MADDPG
|
||||
|
||||
|
||||
class Agent:
|
||||
def __init__(self, agent_id, args):
|
||||
self.args = args
|
||||
self.agent_id = agent_id
|
||||
self.policy = MADDPG(args, agent_id)
|
||||
|
||||
def select_action(self, o, noise_rate, epsilon):
|
||||
if np.random.uniform() < epsilon:
|
||||
u = np.random.uniform(-self.args.high_action, self.args.high_action, self.args.action_shape[self.agent_id])
|
||||
else:
|
||||
inputs = torch.tensor(o, dtype=torch.float32).unsqueeze(0)
|
||||
pi = self.policy.actor_network(inputs).squeeze(0)
|
||||
# print('{} : {}'.format(self.name, pi))
|
||||
u = pi.cpu().numpy()
|
||||
noise = noise_rate * self.args.high_action * np.random.randn(*u.shape) # gaussian noise
|
||||
u += noise
|
||||
u = np.clip(u, -self.args.high_action, self.args.high_action)
|
||||
return u.copy()
|
||||
|
||||
def learn(self, transitions, other_agents):
|
||||
self.policy.train(transitions, other_agents)
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
import argparse
|
||||
|
||||
"""
|
||||
Here are the param for the training
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser("Reinforcement Learning experiments for multiagent environments")
|
||||
# Environment
|
||||
parser.add_argument("--scenario-name", type=str, default="simple_tag", help="name of the scenario script")
|
||||
parser.add_argument("--max-episode-len", type=int, default=100, help="maximum episode length")
|
||||
parser.add_argument("--time-steps", type=int, default=2000000, help="number of time steps")
|
||||
# agents
|
||||
parser.add_argument("--num-adversaries", type=int, default=1, help="number of adversaries")
|
||||
# Core training parameters
|
||||
parser.add_argument("--lr-actor", type=float, default=1e-4, help="learning rate of actor")
|
||||
parser.add_argument("--lr-critic", type=float, default=1e-3, help="learning rate of critic")
|
||||
parser.add_argument("--epsilon", type=float, default=0.1, help="epsilon greedy")
|
||||
parser.add_argument("--noise_rate", type=float, default=0.1, help="noise rate for sampling from a standard normal distribution ")
|
||||
parser.add_argument("--gamma", type=float, default=0.95, help="discount factor")
|
||||
parser.add_argument("--tau", type=float, default=0.01, help="parameter for updating the target network")
|
||||
parser.add_argument("--buffer-size", type=int, default=int(5e5), help="number of transitions can be stored in buffer")
|
||||
parser.add_argument("--batch-size", type=int, default=256, help="number of episodes to optimize at the same time")
|
||||
# Checkpointing
|
||||
parser.add_argument("--save-dir", type=str, default="./model", help="directory in which training state and model should be saved")
|
||||
parser.add_argument("--save-rate", type=int, default=2000, help="save model once every time this many episodes are completed")
|
||||
parser.add_argument("--model-dir", type=str, default="", help="directory in which training state and model are loaded")
|
||||
|
||||
# Evaluate
|
||||
parser.add_argument("--evaluate-episodes", type=int, default=10, help="number of episodes for evaluating")
|
||||
parser.add_argument("--evaluate-episode-len", type=int, default=100, help="length of episodes for evaluating")
|
||||
parser.add_argument("--evaluate", type=bool, default=False, help="whether to evaluate the model")
|
||||
parser.add_argument("--evaluate-rate", type=int, default=1000, help="how often to evaluate model")
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
|
@ -0,0 +1,53 @@
|
|||
import threading
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Buffer:
|
||||
def __init__(self, args):
|
||||
self.size = args.buffer_size
|
||||
self.args = args
|
||||
# memory management
|
||||
self.current_size = 0
|
||||
# create the buffer to store info
|
||||
self.buffer = dict()
|
||||
for i in range(self.args.n_agents):
|
||||
self.buffer['o_%d' % i] = np.empty([self.size, self.args.obs_shape[i]])
|
||||
self.buffer['u_%d' % i] = np.empty([self.size, self.args.action_shape[i]])
|
||||
self.buffer['r_%d' % i] = np.empty([self.size])
|
||||
self.buffer['o_next_%d' % i] = np.empty([self.size, self.args.obs_shape[i]])
|
||||
# thread lock
|
||||
self.lock = threading.Lock()
|
||||
|
||||
# store the episode
|
||||
def store_episode(self, o, u, r, o_next):
|
||||
idxs = self._get_storage_idx(inc=1)
|
||||
for i in range(self.args.n_agents):
|
||||
with self.lock:
|
||||
self.buffer['o_%d' % i][idxs] = o[i]
|
||||
self.buffer['u_%d' % i][idxs] = u[i]
|
||||
self.buffer['r_%d' % i][idxs] = r[i]
|
||||
self.buffer['o_next_%d' % i][idxs] = o_next[i]
|
||||
|
||||
# sample the data from the replay buffer
|
||||
def sample(self, batch_size):
|
||||
temp_buffer = {}
|
||||
idx = np.random.randint(0, self.current_size, batch_size)
|
||||
for key in self.buffer.keys():
|
||||
temp_buffer[key] = self.buffer[key][idx]
|
||||
return temp_buffer
|
||||
|
||||
def _get_storage_idx(self, inc=None):
|
||||
inc = inc or 1
|
||||
if self.current_size+inc <= self.size:
|
||||
idx = np.arange(self.current_size, self.current_size+inc)
|
||||
elif self.current_size < self.size:
|
||||
overflow = inc - (self.size - self.current_size)
|
||||
idx_a = np.arange(self.current_size, self.size)
|
||||
idx_b = np.random.randint(0, self.current_size, overflow)
|
||||
idx = np.concatenate([idx_a, idx_b])
|
||||
else:
|
||||
idx = np.random.randint(0, self.size, inc)
|
||||
self.current_size = min(self.size, self.current_size+inc)
|
||||
if inc == 1:
|
||||
idx = idx[0]
|
||||
return idx
|
|
@ -0,0 +1,54 @@
|
|||
import numpy as np
|
||||
import inspect
|
||||
import functools
|
||||
|
||||
|
||||
def store_args(method):
|
||||
"""Stores provided method args as instance attributes.
|
||||
"""
|
||||
argspec = inspect.getfullargspec(method)
|
||||
defaults = {}
|
||||
if argspec.defaults is not None:
|
||||
defaults = dict(
|
||||
zip(argspec.args[-len(argspec.defaults):], argspec.defaults))
|
||||
if argspec.kwonlydefaults is not None:
|
||||
defaults.update(argspec.kwonlydefaults)
|
||||
arg_names = argspec.args[1:]
|
||||
|
||||
@functools.wraps(method)
|
||||
def wrapper(*positional_args, **keyword_args):
|
||||
self = positional_args[0]
|
||||
# Get default arg values
|
||||
args = defaults.copy()
|
||||
# Add provided arg values
|
||||
for name, value in zip(arg_names, positional_args[1:]):
|
||||
args[name] = value
|
||||
args.update(keyword_args)
|
||||
self.__dict__.update(args)
|
||||
return method(*positional_args, **keyword_args)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def make_env(args):
|
||||
from multiagent.environment import MultiAgentEnv
|
||||
import multiagent.scenarios as scenarios
|
||||
|
||||
# load scenario from script
|
||||
scenario = scenarios.load(args.scenario_name + ".py").Scenario()
|
||||
|
||||
# create world
|
||||
world = scenario.make_world()
|
||||
# create multiagent environment
|
||||
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
|
||||
# env = MultiAgentEnv(world)
|
||||
args.n_players = env.n
|
||||
args.n_agents = env.n - args.num_adversaries
|
||||
args.obs_shape = [env.observation_space[i].shape[0] for i in range(args.n_agents)]
|
||||
action_shape = []
|
||||
for content in env.action_space:
|
||||
action_shape.append(content.n)
|
||||
args.action_shape = action_shape[:args.n_agents]
|
||||
args.high_action = 1
|
||||
args.low_action = -1
|
||||
return env, args
|
|
@ -0,0 +1,44 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
# define the actor network
|
||||
class Actor(nn.Module):
|
||||
def __init__(self, args, agent_id):
|
||||
super(Actor, self).__init__()
|
||||
self.max_action = args.high_action
|
||||
self.fc1 = nn.Linear(args.obs_shape[agent_id], 64)
|
||||
self.fc2 = nn.Linear(64, 64)
|
||||
self.fc3 = nn.Linear(64, 64)
|
||||
self.action_out = nn.Linear(128, args.action_shape[agent_id])
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = F.relu(self.fc3(x))
|
||||
actions = self.max_action * torch.tanh(self.action_out(x))
|
||||
|
||||
return actions
|
||||
|
||||
|
||||
class Critic(nn.Module):
|
||||
def __init__(self, args):
|
||||
super(Critic, self).__init__()
|
||||
self.max_action = args.high_action
|
||||
self.fc1 = nn.Linear(sum(args.obs_shape) + sum(args.action_shape), 64)
|
||||
self.fc2 = nn.Linear(64, 64)
|
||||
self.fc3 = nn.Linear(64, 64)
|
||||
self.q_out = nn.Linear(64, 1)
|
||||
|
||||
def forward(self, state, action):
|
||||
state = torch.cat(state, dim=1)
|
||||
for i in range(len(action)):
|
||||
action[i] /= self.max_action
|
||||
action = torch.cat(action, dim=1)
|
||||
x = torch.cat([state, action], dim=1)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = F.relu(self.fc3(x))
|
||||
q_value = self.q_out(x)
|
||||
return q_value
|
|
@ -0,0 +1,114 @@
|
|||
import torch
|
||||
import os
|
||||
from maddpg.actor_critic import Actor, Critic
|
||||
|
||||
|
||||
class MADDPG:
|
||||
def __init__(self, args, agent_id):
|
||||
self.args = args
|
||||
self.agent_id = agent_id
|
||||
self.train_step = 0
|
||||
|
||||
# create the network
|
||||
self.actor_network = Actor(args, agent_id)
|
||||
self.critic_network = Critic(args)
|
||||
|
||||
# build up the target network
|
||||
self.actor_target_network = Actor(args, agent_id)
|
||||
self.critic_target_network = Critic(args)
|
||||
|
||||
# load the weights into the target networks
|
||||
self.actor_target_network.load_state_dict(self.actor_network.state_dict())
|
||||
self.critic_target_network.load_state_dict(self.critic_network.state_dict())
|
||||
|
||||
# create the optimizer
|
||||
self.actor_optim = torch.optim.Adam(self.actor_network.parameters(), lr=self.args.lr_actor)
|
||||
self.critic_optim = torch.optim.Adam(self.critic_network.parameters(), lr=self.args.lr_critic)
|
||||
|
||||
# create the dict for store the model
|
||||
if not os.path.exists(self.args.save_dir):
|
||||
os.mkdir(self.args.save_dir)
|
||||
# path to save the model
|
||||
self.model_path = self.args.save_dir + '/' + self.args.scenario_name
|
||||
if not os.path.exists(self.model_path):
|
||||
os.mkdir(self.model_path)
|
||||
self.model_path = self.model_path + '/' + 'agent_%d' % agent_id
|
||||
if not os.path.exists(self.model_path):
|
||||
os.mkdir(self.model_path)
|
||||
|
||||
if os.path.exists(self.model_path + '/actor_params.pkl'):
|
||||
self.actor_network.load_state_dict(torch.load(self.model_path + '/actor_params.pkl'))
|
||||
self.critic_network.load_state_dict(torch.load(self.model_path + '/critic_params.pkl'))
|
||||
print('Agent {} successfully loaded actor_network: {}'.format(self.agent_id,
|
||||
self.model_path + '/actor_params.pkl'))
|
||||
print('Agent {} successfully loaded critic_network: {}'.format(self.agent_id,
|
||||
self.model_path + '/critic_params.pkl'))
|
||||
|
||||
# soft update
|
||||
def _soft_update_target_network(self):
|
||||
for target_param, param in zip(self.actor_target_network.parameters(), self.actor_network.parameters()):
|
||||
target_param.data.copy_((1 - self.args.tau) * target_param.data + self.args.tau * param.data)
|
||||
|
||||
for target_param, param in zip(self.critic_target_network.parameters(), self.critic_network.parameters()):
|
||||
target_param.data.copy_((1 - self.args.tau) * target_param.data + self.args.tau * param.data)
|
||||
|
||||
# update the network
|
||||
def train(self, transitions, other_agents):
|
||||
for key in transitions.keys():
|
||||
transitions[key] = torch.tensor(transitions[key], dtype=torch.float32)
|
||||
r = transitions['r_%d' % self.agent_id] # reward
|
||||
o, u, o_next = [], [], [] # agent
|
||||
for agent_id in range(self.args.n_agents):
|
||||
o.append(transitions['o_%d' % agent_id])
|
||||
u.append(transitions['u_%d' % agent_id])
|
||||
o_next.append(transitions['o_next_%d' % agent_id])
|
||||
|
||||
# calculate the target Q value function
|
||||
u_next = []
|
||||
with torch.no_grad():
|
||||
index = 0
|
||||
for agent_id in range(self.args.n_agents):
|
||||
if agent_id == self.agent_id:
|
||||
u_next.append(self.actor_target_network(o_next[agent_id]))
|
||||
else:
|
||||
# other_agents
|
||||
u_next.append(other_agents[index].policy.actor_target_network(o_next[agent_id]))
|
||||
index += 1
|
||||
q_next = self.critic_target_network(o_next, u_next).detach()
|
||||
|
||||
target_q = (r.unsqueeze(1) + self.args.gamma * q_next).detach()
|
||||
|
||||
# the q loss
|
||||
q_value = self.critic_network(o, u)
|
||||
critic_loss = (target_q - q_value).pow(2).mean()
|
||||
|
||||
# the actor loss
|
||||
u[self.agent_id] = self.actor_network(o[self.agent_id])
|
||||
actor_loss = - self.critic_network(o, u).mean()
|
||||
#if self.agent_id == 0:
|
||||
# print('critic_loss is {}, actor_loss is {}'.format(critic_loss, actor_loss))
|
||||
# update the network
|
||||
self.actor_optim.zero_grad()
|
||||
actor_loss.backward()
|
||||
self.actor_optim.step()
|
||||
self.critic_optim.zero_grad()
|
||||
critic_loss.backward()
|
||||
self.critic_optim.step()
|
||||
|
||||
self._soft_update_target_network()
|
||||
if self.train_step > 0 and self.train_step % self.args.save_rate == 0:
|
||||
self.save_model(self.train_step)
|
||||
self.train_step += 1
|
||||
|
||||
def save_model(self, train_step):
|
||||
num = str(train_step // self.args.save_rate)
|
||||
model_path = os.path.join(self.args.save_dir, self.args.scenario_name)
|
||||
if not os.path.exists(model_path):
|
||||
os.makedirs(model_path)
|
||||
model_path = os.path.join(model_path, 'agent_%d' % self.agent_id)
|
||||
if not os.path.exists(model_path):
|
||||
os.makedirs(model_path)
|
||||
torch.save(self.actor_network.state_dict(), model_path + '/' + num + '_actor_params.pkl')
|
||||
torch.save(self.critic_network.state_dict(), model_path + '/' + num + '_critic_params.pkl')
|
||||
|
||||
|
|
@ -0,0 +1,331 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "11c84981",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Looking in indexes: https://nexus.c68.spacecorp.ru/repository/pypi_group/simple/\n",
|
||||
"Requirement already satisfied: torch==1.7.1 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (1.7.1)\n",
|
||||
"Requirement already satisfied: typing-extensions in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from torch==1.7.1) (4.4.0)\n",
|
||||
"Requirement already satisfied: numpy in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from torch==1.7.1) (1.24.1)\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install torch==1.7.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "735f4c82",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Looking in indexes: https://nexus.c68.spacecorp.ru/repository/pypi_group/simple/\n",
|
||||
"Collecting supersuit==2.6.5\n",
|
||||
" Using cached SuperSuit-2.6.5-py3-none-any.whl\n",
|
||||
"Requirement already satisfied: cloudpickle in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from supersuit==2.6.5) (2.2.0)\n",
|
||||
"Collecting pettingzoo>=1.6.0\n",
|
||||
" Using cached https://nexus.c68.spacecorp.ru/repository/pypi_group/packages/pettingzoo/1.22.3/PettingZoo-1.22.3-py3-none-any.whl (816 kB)\n",
|
||||
"Collecting opencv-python~=3.4.0\n",
|
||||
" Using cached https://nexus.c68.spacecorp.ru/repository/pypi_group/packages/opencv-python/3.4.18.65/opencv_python-3.4.18.65-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (58.4 MB)\n",
|
||||
"Requirement already satisfied: numpy>=1.19.3 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from opencv-python~=3.4.0->supersuit==2.6.5) (1.24.1)\n",
|
||||
"Requirement already satisfied: gymnasium>=0.26.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from pettingzoo>=1.6.0->supersuit==2.6.5) (0.27.0)\n",
|
||||
"Requirement already satisfied: typing-extensions>=4.3.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from gymnasium>=0.26.0->pettingzoo>=1.6.0->supersuit==2.6.5) (4.4.0)\n",
|
||||
"Requirement already satisfied: importlib-metadata>=4.8.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from gymnasium>=0.26.0->pettingzoo>=1.6.0->supersuit==2.6.5) (6.0.0)\n",
|
||||
"Requirement already satisfied: shimmy<1.0,>=0.1.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from gymnasium>=0.26.0->pettingzoo>=1.6.0->supersuit==2.6.5) (0.2.0)\n",
|
||||
"Requirement already satisfied: jax-jumpy>=0.2.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from gymnasium>=0.26.0->pettingzoo>=1.6.0->supersuit==2.6.5) (0.2.0)\n",
|
||||
"Requirement already satisfied: gymnasium-notices>=0.0.1 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from gymnasium>=0.26.0->pettingzoo>=1.6.0->supersuit==2.6.5) (0.0.1)\n",
|
||||
"Requirement already satisfied: zipp>=0.5 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from importlib-metadata>=4.8.0->gymnasium>=0.26.0->pettingzoo>=1.6.0->supersuit==2.6.5) (3.11.0)\n",
|
||||
"Installing collected packages: pettingzoo, opencv-python, supersuit\n",
|
||||
" Attempting uninstall: pettingzoo\n",
|
||||
" Found existing installation: PettingZoo 1.3.3\n",
|
||||
" Uninstalling PettingZoo-1.3.3:\n",
|
||||
" Successfully uninstalled PettingZoo-1.3.3\n",
|
||||
" Attempting uninstall: opencv-python\n",
|
||||
" Found existing installation: opencv-python 4.7.0.68\n",
|
||||
" Uninstalling opencv-python-4.7.0.68:\n",
|
||||
" Successfully uninstalled opencv-python-4.7.0.68\n",
|
||||
" Attempting uninstall: supersuit\n",
|
||||
" Found existing installation: SuperSuit 3.7.1\n",
|
||||
" Uninstalling SuperSuit-3.7.1:\n",
|
||||
" Successfully uninstalled SuperSuit-3.7.1\n",
|
||||
"Successfully installed opencv-python-3.4.18.65 pettingzoo-1.22.3 supersuit-2.6.5\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install supersuit==2.6.5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "3b8272be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Looking in indexes: https://nexus.c68.spacecorp.ru/repository/pypi_group/simple/\n",
|
||||
"Collecting tqdm\n",
|
||||
" Using cached https://nexus.c68.spacecorp.ru/repository/pypi_group/packages/tqdm/4.64.1/tqdm-4.64.1-py2.py3-none-any.whl (78 kB)\n",
|
||||
"Installing collected packages: tqdm\n",
|
||||
"Successfully installed tqdm-4.64.1\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install tqdm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "acc570b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Looking in indexes: https://nexus.c68.spacecorp.ru/repository/pypi_group/simple/\n",
|
||||
"Requirement already satisfied: matplotlib in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (3.6.3)\n",
|
||||
"Requirement already satisfied: contourpy>=1.0.1 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (1.0.6)\n",
|
||||
"Requirement already satisfied: numpy>=1.19 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (1.24.1)\n",
|
||||
"Requirement already satisfied: pyparsing>=2.2.1 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (3.0.9)\n",
|
||||
"Requirement already satisfied: packaging>=20.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (23.0)\n",
|
||||
"Requirement already satisfied: fonttools>=4.22.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (4.38.0)\n",
|
||||
"Requirement already satisfied: cycler>=0.10 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (0.11.0)\n",
|
||||
"Requirement already satisfied: kiwisolver>=1.0.1 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (1.4.4)\n",
|
||||
"Requirement already satisfied: pillow>=6.2.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (9.4.0)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.7 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from matplotlib) (2.8.2)\n",
|
||||
"Requirement already satisfied: six>=1.5 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install matplotlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "262ae5d6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Looking in indexes: https://nexus.c68.spacecorp.ru/repository/pypi_group/simple/\n",
|
||||
"Collecting gym==0.10.5\n",
|
||||
" Using cached gym-0.10.5-py3-none-any.whl\n",
|
||||
"Requirement already satisfied: six in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from gym==0.10.5) (1.16.0)\n",
|
||||
"Collecting pyglet>=1.2.0\n",
|
||||
" Using cached https://nexus.c68.spacecorp.ru/repository/pypi_group/packages/pyglet/2.0.3/pyglet-2.0.3-py3-none-any.whl (968 kB)\n",
|
||||
"Requirement already satisfied: numpy>=1.10.4 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from gym==0.10.5) (1.24.1)\n",
|
||||
"Requirement already satisfied: requests>=2.0 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from gym==0.10.5) (2.28.1)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from requests>=2.0->gym==0.10.5) (3.4)\n",
|
||||
"Requirement already satisfied: charset-normalizer<3,>=2 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from requests>=2.0->gym==0.10.5) (2.1.1)\n",
|
||||
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from requests>=2.0->gym==0.10.5) (1.26.14)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/venv/lib/python3.9/site-packages (from requests>=2.0->gym==0.10.5) (2022.12.7)\n",
|
||||
"Installing collected packages: pyglet, gym\n",
|
||||
" Attempting uninstall: gym\n",
|
||||
" Found existing installation: gym 0.22.0\n",
|
||||
" Uninstalling gym-0.22.0:\n",
|
||||
" Successfully uninstalled gym-0.22.0\n",
|
||||
"Successfully installed gym-0.10.5 pyglet-2.0.3\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install gym==0.10.5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "9c651810",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Looking in indexes: https://nexus.c68.spacecorp.ru/repository/pypi_group/simple/\n",
|
||||
"Collecting pyglet==1.5.27\n",
|
||||
" Using cached https://nexus.c68.spacecorp.ru/repository/pypi_group/packages/pyglet/1.5.27/pyglet-1.5.27-py3-none-any.whl (1.1 MB)\n",
|
||||
"Installing collected packages: pyglet\n",
|
||||
" Attempting uninstall: pyglet\n",
|
||||
" Found existing installation: pyglet 2.0.3\n",
|
||||
" Uninstalling pyglet-2.0.3:\n",
|
||||
" Successfully uninstalled pyglet-2.0.3\n",
|
||||
"Successfully installed pyglet-1.5.27\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pip install pyglet==1.5.27"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "cb877007",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Пытаемся загрузить данные!\n",
|
||||
"Пытаемся загрузить данные!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0%| | 203/2000000 [00:00<32:10, 1036.07it/s]/home/ovchinnikov_ii@RISDE.ru/Software/Jupyter/MADDPG/maddpg/maddpg.py:60: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
|
||||
" transitions[key] = torch.tensor(transitions[key], dtype=torch.float32)\n",
|
||||
" 0%| | 307/2000000 [00:01<2:18:56, 239.88it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Пытаемся сохранить данные по пути = ./model/simple_adversary/agent_0/1_actor_params.pkl\n",
|
||||
"Пытаемся сохранить данные по пути = ./model/simple_adversary/agent_1/1_actor_params.pkl\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0%| | 459/2000000 [00:03<6:02:10, 92.02it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Пытаемся сохранить данные по пути = ./model/simple_adversary/agent_0/2_actor_params.pkl\n",
|
||||
"Пытаемся сохранить данные по пути = ./model/simple_adversary/agent_1/2_actor_params.pkl\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0%| | 566/2000000 [00:04<7:30:23, 73.99it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Пытаемся сохранить данные по пути = ./model/simple_adversary/agent_0/3_actor_params.pkl\n",
|
||||
"Пытаемся сохранить данные по пути = ./model/simple_adversary/agent_1/3_actor_params.pkl\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0%| | 667/2000000 [00:06<8:44:54, 63.48it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Пытаемся сохранить данные по пути = ./model/simple_adversary/agent_0/4_actor_params.pkl\n",
|
||||
"Пытаемся сохранить данные по пути = ./model/simple_adversary/agent_1/4_actor_params.pkl\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0%| | 717/2000000 [00:07<5:36:33, 99.01it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m~/Software/Jupyter/MADDPG/main.py:18\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAverage returns is\u001b[39m\u001b[38;5;124m'\u001b[39m, returns)\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 18\u001b[0m \u001b[43mrunner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Software/Jupyter/MADDPG/runner.py:52\u001b[0m, in \u001b[0;36mRunner.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 50\u001b[0m other_agents \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39magents\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[1;32m 51\u001b[0m other_agents\u001b[38;5;241m.\u001b[39mremove(agent)\n\u001b[0;32m---> 52\u001b[0m \u001b[43magent\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtransitions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother_agents\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m time_step \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m time_step \u001b[38;5;241m%\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mevaluate_rate \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 54\u001b[0m returns\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluate())\n",
|
||||
"File \u001b[0;32m~/Software/Jupyter/MADDPG/agent.py:27\u001b[0m, in \u001b[0;36mAgent.learn\u001b[0;34m(self, transitions, other_agents)\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlearn\u001b[39m(\u001b[38;5;28mself\u001b[39m, transitions, other_agents):\n\u001b[0;32m---> 27\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpolicy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtransitions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother_agents\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Software/Jupyter/MADDPG/maddpg/maddpg.py:95\u001b[0m, in \u001b[0;36mMADDPG.train\u001b[0;34m(self, transitions, other_agents)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mactor_optim\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m 94\u001b[0m actor_loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mactor_optim\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcritic_optim\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m 97\u001b[0m critic_loss\u001b[38;5;241m.\u001b[39mbackward()\n",
|
||||
"File \u001b[0;32m~/Software/Jupyter/venv/lib/python3.9/site-packages/torch/autograd/grad_mode.py:26\u001b[0m, in \u001b[0;36m_DecoratorContextManager.__call__.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m():\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Software/Jupyter/venv/lib/python3.9/site-packages/torch/optim/adamw.py:116\u001b[0m, in \u001b[0;36mAdamW.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 112\u001b[0m denom \u001b[38;5;241m=\u001b[39m (exp_avg_sq\u001b[38;5;241m.\u001b[39msqrt() \u001b[38;5;241m/\u001b[39m math\u001b[38;5;241m.\u001b[39msqrt(bias_correction2))\u001b[38;5;241m.\u001b[39madd_(group[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124meps\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 114\u001b[0m step_size \u001b[38;5;241m=\u001b[39m group[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlr\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m/\u001b[39m bias_correction1\n\u001b[0;32m--> 116\u001b[0m p\u001b[38;5;241m.\u001b[39maddcdiv_(exp_avg, denom, value\u001b[38;5;241m=\u001b[39m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43mstep_size\u001b[49m)\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss\n",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%run ./main.py --scenario-name=simple_adversary --evaluate-episodes=10000 --save-rate=100"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d71a2b22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d079aff2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
from runner import Runner
|
||||
from common.arguments import get_args
|
||||
from common.utils import make_env
|
||||
import numpy as np
|
||||
import random
|
||||
import torch
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# get the params
|
||||
args = get_args()
|
||||
env, args = make_env(args)
|
||||
runner = Runner(args, env)
|
||||
if args.evaluate:
|
||||
returns = runner.evaluate()
|
||||
print('Average returns is', returns)
|
||||
else:
|
||||
runner.run()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,18 @@
|
|||
from gym.envs.registration import register
|
||||
|
||||
# Multiagent envs
|
||||
# ----------------------------------------
|
||||
|
||||
register(
|
||||
id='MultiagentSimple-v0',
|
||||
entry_point='multiagent.envs:SimpleEnv',
|
||||
# FIXME(cathywu) currently has to be exactly max_path_length parameters in
|
||||
# rllab run script
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='MultiagentSimpleSpeakerListener-v0',
|
||||
entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
|
||||
max_episode_steps=100,
|
||||
)
|
|
@ -0,0 +1,196 @@
|
|||
import numpy as np
|
||||
|
||||
# physical/external base state of all entites
|
||||
class EntityState(object):
|
||||
def __init__(self):
|
||||
# physical position
|
||||
self.p_pos = None
|
||||
# physical velocity
|
||||
self.p_vel = None
|
||||
|
||||
# state of agents (including communication and internal/mental state)
|
||||
class AgentState(EntityState):
|
||||
def __init__(self):
|
||||
super(AgentState, self).__init__()
|
||||
# communication utterance
|
||||
self.c = None
|
||||
|
||||
# action of the agent
|
||||
class Action(object):
|
||||
def __init__(self):
|
||||
# physical action
|
||||
self.u = None
|
||||
# communication action
|
||||
self.c = None
|
||||
|
||||
# properties and state of physical world entity
|
||||
class Entity(object):
|
||||
def __init__(self):
|
||||
# name
|
||||
self.name = ''
|
||||
# properties:
|
||||
self.size = 0.050
|
||||
# entity can move / be pushed
|
||||
self.movable = False
|
||||
# entity collides with others
|
||||
self.collide = True
|
||||
# material density (affects mass)
|
||||
self.density = 25.0
|
||||
# color
|
||||
self.color = None
|
||||
# max speed and accel
|
||||
self.max_speed = None
|
||||
self.accel = None
|
||||
# state
|
||||
self.state = EntityState()
|
||||
# mass
|
||||
self.initial_mass = 1.0
|
||||
|
||||
@property
|
||||
def mass(self):
|
||||
return self.initial_mass
|
||||
|
||||
# properties of landmark entities
|
||||
class Landmark(Entity):
|
||||
def __init__(self):
|
||||
super(Landmark, self).__init__()
|
||||
|
||||
# properties of agent entities
|
||||
class Agent(Entity):
|
||||
def __init__(self):
|
||||
super(Agent, self).__init__()
|
||||
# agents are movable by default
|
||||
self.movable = True
|
||||
# cannot send communication signals
|
||||
self.silent = False
|
||||
# cannot observe the world
|
||||
self.blind = False
|
||||
# physical motor noise amount
|
||||
self.u_noise = None
|
||||
# communication noise amount
|
||||
self.c_noise = None
|
||||
# control range
|
||||
self.u_range = 1.0
|
||||
# state
|
||||
self.state = AgentState()
|
||||
# action
|
||||
self.action = Action()
|
||||
# script behavior to execute
|
||||
self.action_callback = None
|
||||
|
||||
# multi-agent world
|
||||
class World(object):
|
||||
def __init__(self):
|
||||
# list of agents and entities (can change at execution-time!)
|
||||
self.agents = []
|
||||
self.landmarks = []
|
||||
# communication channel dimensionality
|
||||
self.dim_c = 0
|
||||
# position dimensionality
|
||||
self.dim_p = 2
|
||||
# color dimensionality
|
||||
self.dim_color = 3
|
||||
# simulation timestep
|
||||
self.dt = 0.1
|
||||
# physical damping
|
||||
self.damping = 0.25
|
||||
# contact response parameters
|
||||
self.contact_force = 1e+2
|
||||
self.contact_margin = 1e-3
|
||||
|
||||
# return all entities in the world
|
||||
@property
|
||||
def entities(self):
|
||||
return self.agents + self.landmarks
|
||||
|
||||
# return all agents controllable by external policies
|
||||
@property
|
||||
def policy_agents(self):
|
||||
return [agent for agent in self.agents if agent.action_callback is None]
|
||||
|
||||
# return all agents controlled by world scripts
|
||||
@property
|
||||
def scripted_agents(self):
|
||||
return [agent for agent in self.agents if agent.action_callback is not None]
|
||||
|
||||
# update state of the world
|
||||
def step(self):
|
||||
# set actions for scripted agents
|
||||
for agent in self.scripted_agents:
|
||||
agent.action = agent.action_callback(agent, self)
|
||||
# gather forces applied to entities
|
||||
p_force = [None] * len(self.entities)
|
||||
# apply agent physical controls
|
||||
p_force = self.apply_action_force(p_force)
|
||||
# apply environment forces
|
||||
p_force = self.apply_environment_force(p_force)
|
||||
# integrate physical state
|
||||
self.integrate_state(p_force)
|
||||
# update agent state
|
||||
for agent in self.agents:
|
||||
self.update_agent_state(agent)
|
||||
|
||||
# gather agent action forces
|
||||
def apply_action_force(self, p_force):
|
||||
# set applied forces
|
||||
for i,agent in enumerate(self.agents):
|
||||
if agent.movable:
|
||||
noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
|
||||
p_force[i] = agent.action.u + noise
|
||||
return p_force
|
||||
|
||||
# gather physical forces acting on entities
|
||||
def apply_environment_force(self, p_force):
|
||||
# simple (but inefficient) collision response
|
||||
for a,entity_a in enumerate(self.entities):
|
||||
for b,entity_b in enumerate(self.entities):
|
||||
if(b <= a): continue
|
||||
[f_a, f_b] = self.get_collision_force(entity_a, entity_b)
|
||||
if(f_a is not None):
|
||||
if(p_force[a] is None): p_force[a] = 0.0
|
||||
p_force[a] = f_a + p_force[a]
|
||||
if(f_b is not None):
|
||||
if(p_force[b] is None): p_force[b] = 0.0
|
||||
p_force[b] = f_b + p_force[b]
|
||||
return p_force
|
||||
|
||||
# integrate physical state
|
||||
def integrate_state(self, p_force):
|
||||
for i,entity in enumerate(self.entities):
|
||||
if not entity.movable: continue
|
||||
entity.state.p_vel = entity.state.p_vel * (1 - self.damping)
|
||||
if (p_force[i] is not None):
|
||||
entity.state.p_vel += (p_force[i] / entity.mass) * self.dt
|
||||
if entity.max_speed is not None:
|
||||
speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1]))
|
||||
if speed > entity.max_speed:
|
||||
entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) +
|
||||
np.square(entity.state.p_vel[1])) * entity.max_speed
|
||||
entity.state.p_pos += entity.state.p_vel * self.dt
|
||||
|
||||
def update_agent_state(self, agent):
|
||||
# set communication state (directly for now)
|
||||
if agent.silent:
|
||||
agent.state.c = np.zeros(self.dim_c)
|
||||
else:
|
||||
noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
|
||||
agent.state.c = agent.action.c + noise
|
||||
|
||||
# get collision forces for any contact between two entities
|
||||
def get_collision_force(self, entity_a, entity_b):
|
||||
if (not entity_a.collide) or (not entity_b.collide):
|
||||
return [None, None] # not a collider
|
||||
if (entity_a is entity_b):
|
||||
return [None, None] # don't collide against itself
|
||||
# compute actual distance between entities
|
||||
delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
|
||||
dist = np.sqrt(np.sum(np.square(delta_pos)))
|
||||
# minimum allowable distance
|
||||
dist_min = entity_a.size + entity_b.size
|
||||
# softmax penetration
|
||||
k = self.contact_margin
|
||||
penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
|
||||
force = self.contact_force * delta_pos / dist * penetration
|
||||
force_a = +force if entity_a.movable else None
|
||||
force_b = -force if entity_b.movable else None
|
||||
return [force_a, force_b]
|
|
@ -0,0 +1,336 @@
|
|||
import gym
|
||||
from gym import spaces
|
||||
from gym.envs.registration import EnvSpec
|
||||
import numpy as np
|
||||
from multiagent.multi_discrete import MultiDiscrete
|
||||
|
||||
# environment for all agents in the multiagent world
|
||||
# currently code assumes that no agents will be created/destroyed at runtime!
|
||||
class MultiAgentEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes' : ['human', 'rgb_array']
|
||||
}
|
||||
|
||||
def __init__(self, world, reset_callback=None, reward_callback=None,
|
||||
observation_callback=None, info_callback=None,
|
||||
done_callback=None, shared_viewer=True):
|
||||
|
||||
self.world = world
|
||||
self.agents = self.world.policy_agents
|
||||
# set required vectorized gym env property
|
||||
self.n = len(world.policy_agents)
|
||||
# scenario callbacks
|
||||
self.reset_callback = reset_callback
|
||||
self.reward_callback = reward_callback
|
||||
self.observation_callback = observation_callback
|
||||
self.info_callback = info_callback
|
||||
self.done_callback = done_callback
|
||||
# environment parameters
|
||||
self.discrete_action_space = True
|
||||
# if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
|
||||
self.discrete_action_input = False
|
||||
# if true, even the action is continuous, action will be performed discretely
|
||||
self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
|
||||
# if true, every agent has the same reward
|
||||
self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
|
||||
self.time = 0
|
||||
|
||||
# configure spaces
|
||||
self.action_space = []
|
||||
self.observation_space = []
|
||||
for agent in self.agents:
|
||||
total_action_space = []
|
||||
# physical action space
|
||||
if self.discrete_action_space:
|
||||
u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
|
||||
else:
|
||||
u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
|
||||
if agent.movable:
|
||||
total_action_space.append(u_action_space)
|
||||
# communication action space
|
||||
if self.discrete_action_space:
|
||||
c_action_space = spaces.Discrete(world.dim_c)
|
||||
else:
|
||||
c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
|
||||
if not agent.silent:
|
||||
total_action_space.append(c_action_space)
|
||||
# total action space
|
||||
if len(total_action_space) > 1:
|
||||
# all action spaces are discrete, so simplify to MultiDiscrete action space
|
||||
if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
|
||||
act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
|
||||
#print(act_space.n)
|
||||
else:
|
||||
act_space = spaces.Tuple(total_action_space)
|
||||
self.action_space.append(act_space)
|
||||
else:
|
||||
self.action_space.append(total_action_space[0])
|
||||
# observation space
|
||||
obs_dim = len(observation_callback(agent, self.world))
|
||||
self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
|
||||
agent.action.c = np.zeros(self.world.dim_c)
|
||||
|
||||
# rendering
|
||||
self.shared_viewer = shared_viewer
|
||||
if self.shared_viewer:
|
||||
self.viewers = [None]
|
||||
else:
|
||||
self.viewers = [None] * self.n
|
||||
self._reset_render()
|
||||
|
||||
def step(self, action_n):
|
||||
obs_n = []
|
||||
reward_n = []
|
||||
done_n = []
|
||||
info_n = {'n': []}
|
||||
self.agents = self.world.policy_agents
|
||||
# set action for each agent
|
||||
for i, agent in enumerate(self.agents):
|
||||
self._set_action(action_n[i], agent, self.action_space[i])
|
||||
# advance world state
|
||||
self.world.step()
|
||||
# record observation for each agent
|
||||
for agent in self.agents:
|
||||
obs_n.append(self._get_obs(agent))
|
||||
reward_n.append(self._get_reward(agent))
|
||||
done_n.append(self._get_done(agent))
|
||||
|
||||
info_n['n'].append(self._get_info(agent))
|
||||
|
||||
# all agents get total reward in cooperative case
|
||||
reward = np.sum(reward_n)
|
||||
if self.shared_reward:
|
||||
reward_n = [reward] * self.n
|
||||
|
||||
return obs_n, reward_n, done_n, info_n
|
||||
|
||||
def reset(self):
|
||||
# reset world
|
||||
self.reset_callback(self.world)
|
||||
# reset renderer
|
||||
self._reset_render()
|
||||
# record observations for each agent
|
||||
obs_n = []
|
||||
self.agents = self.world.policy_agents
|
||||
for agent in self.agents:
|
||||
obs_n.append(self._get_obs(agent))
|
||||
return obs_n
|
||||
|
||||
# get info used for benchmarking
|
||||
def _get_info(self, agent):
|
||||
if self.info_callback is None:
|
||||
return {}
|
||||
return self.info_callback(agent, self.world)
|
||||
|
||||
# get observation for a particular agent
|
||||
def _get_obs(self, agent):
|
||||
if self.observation_callback is None:
|
||||
return np.zeros(0)
|
||||
return self.observation_callback(agent, self.world)
|
||||
|
||||
# get dones for a particular agent
|
||||
# unused right now -- agents are allowed to go beyond the viewing screen
|
||||
def _get_done(self, agent):
|
||||
if self.done_callback is None:
|
||||
return False
|
||||
return self.done_callback(agent, self.world)
|
||||
|
||||
# get reward for a particular agent
|
||||
def _get_reward(self, agent):
|
||||
if self.reward_callback is None:
|
||||
return 0.0
|
||||
return self.reward_callback(agent, self.world)
|
||||
|
||||
# set env action for a particular agent
|
||||
def _set_action(self, action, agent, action_space, time=None):
|
||||
agent.action.u = np.zeros(self.world.dim_p)
|
||||
agent.action.c = np.zeros(self.world.dim_c)
|
||||
# process action
|
||||
if isinstance(action_space, MultiDiscrete):
|
||||
act = []
|
||||
size = action_space.high - action_space.low + 1
|
||||
index = 0
|
||||
for s in size:
|
||||
act.append(action[index:(index+s)])
|
||||
index += s
|
||||
action = act
|
||||
else:
|
||||
action = [action]
|
||||
|
||||
if agent.movable:
|
||||
# physical action
|
||||
if self.discrete_action_input:
|
||||
agent.action.u = np.zeros(self.world.dim_p)
|
||||
# process discrete action
|
||||
if action[0] == 1: agent.action.u[0] = -1.0
|
||||
if action[0] == 2: agent.action.u[0] = +1.0
|
||||
if action[0] == 3: agent.action.u[1] = -1.0
|
||||
if action[0] == 4: agent.action.u[1] = +1.0
|
||||
else:
|
||||
if self.force_discrete_action:
|
||||
d = np.argmax(action[0])
|
||||
action[0][:] = 0.0
|
||||
action[0][d] = 1.0
|
||||
if self.discrete_action_space:
|
||||
agent.action.u[0] += action[0][1] - action[0][2]
|
||||
agent.action.u[1] += action[0][3] - action[0][4]
|
||||
else:
|
||||
agent.action.u = action[0]
|
||||
sensitivity = 5.0
|
||||
if agent.accel is not None:
|
||||
sensitivity = agent.accel
|
||||
agent.action.u *= sensitivity
|
||||
action = action[1:]
|
||||
if not agent.silent:
|
||||
# communication action
|
||||
if self.discrete_action_input:
|
||||
agent.action.c = np.zeros(self.world.dim_c)
|
||||
agent.action.c[action[0]] = 1.0
|
||||
else:
|
||||
agent.action.c = action[0]
|
||||
action = action[1:]
|
||||
# make sure we used all elements of action
|
||||
assert len(action) == 0
|
||||
|
||||
# reset rendering assets
|
||||
def _reset_render(self):
|
||||
self.render_geoms = None
|
||||
self.render_geoms_xform = None
|
||||
|
||||
# render environment
|
||||
def render(self, mode='human'):
|
||||
if mode == 'human':
|
||||
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
message = ''
|
||||
for agent in self.world.agents:
|
||||
comm = []
|
||||
for other in self.world.agents:
|
||||
if other is agent: continue
|
||||
if np.all(other.state.c == 0):
|
||||
word = '_'
|
||||
else:
|
||||
word = alphabet[np.argmax(other.state.c)]
|
||||
message += (other.name + ' to ' + agent.name + ': ' + word + ' ')
|
||||
print(message)
|
||||
|
||||
for i in range(len(self.viewers)):
|
||||
# create viewers (if necessary)
|
||||
if self.viewers[i] is None:
|
||||
# import rendering only if we need it (and don't import for headless machines)
|
||||
#from gym.envs.classic_control import rendering
|
||||
from multiagent import rendering
|
||||
self.viewers[i] = rendering.Viewer(700,700)
|
||||
|
||||
# create rendering geometry
|
||||
if self.render_geoms is None:
|
||||
# import rendering only if we need it (and don't import for headless machines)
|
||||
#from gym.envs.classic_control import rendering
|
||||
from multiagent import rendering
|
||||
self.render_geoms = []
|
||||
self.render_geoms_xform = []
|
||||
for entity in self.world.entities:
|
||||
geom = rendering.make_circle(entity.size)
|
||||
xform = rendering.Transform()
|
||||
if 'agent' in entity.name:
|
||||
geom.set_color(*entity.color, alpha=0.5)
|
||||
else:
|
||||
geom.set_color(*entity.color)
|
||||
geom.add_attr(xform)
|
||||
self.render_geoms.append(geom)
|
||||
self.render_geoms_xform.append(xform)
|
||||
|
||||
# add geoms to viewer
|
||||
for viewer in self.viewers:
|
||||
viewer.geoms = []
|
||||
for geom in self.render_geoms:
|
||||
viewer.add_geom(geom)
|
||||
|
||||
results = []
|
||||
for i in range(len(self.viewers)):
|
||||
from multiagent import rendering
|
||||
# update bounds to center around agent
|
||||
cam_range = 1
|
||||
if self.shared_viewer:
|
||||
pos = np.zeros(self.world.dim_p)
|
||||
else:
|
||||
pos = self.agents[i].state.p_pos
|
||||
self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range)
|
||||
# update geometry positions
|
||||
for e, entity in enumerate(self.world.entities):
|
||||
self.render_geoms_xform[e].set_translation(*entity.state.p_pos)
|
||||
# render to display or array
|
||||
results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array'))
|
||||
|
||||
return results
|
||||
|
||||
# create receptor field locations in local coordinate frame
|
||||
def _make_receptor_locations(self, agent):
|
||||
receptor_type = 'polar'
|
||||
range_min = 0.05 * 2.0
|
||||
range_max = 1.00
|
||||
dx = []
|
||||
# circular receptive field
|
||||
if receptor_type == 'polar':
|
||||
for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False):
|
||||
for distance in np.linspace(range_min, range_max, 3):
|
||||
dx.append(distance * np.array([np.cos(angle), np.sin(angle)]))
|
||||
# add origin
|
||||
dx.append(np.array([0.0, 0.0]))
|
||||
# grid receptive field
|
||||
if receptor_type == 'grid':
|
||||
for x in np.linspace(-range_max, +range_max, 5):
|
||||
for y in np.linspace(-range_max, +range_max, 5):
|
||||
dx.append(np.array([x,y]))
|
||||
return dx
|
||||
|
||||
|
||||
# vectorized wrapper for a batch of multi-agent environments
|
||||
# assumes all environments have the same observation and action space
|
||||
class BatchMultiAgentEnv(gym.Env):
|
||||
metadata = {
|
||||
'runtime.vectorized': True,
|
||||
'render.modes' : ['human', 'rgb_array']
|
||||
}
|
||||
|
||||
def __init__(self, env_batch):
|
||||
self.env_batch = env_batch
|
||||
|
||||
@property
|
||||
def n(self):
|
||||
return np.sum([env.n for env in self.env_batch])
|
||||
|
||||
@property
|
||||
def action_space(self):
|
||||
return self.env_batch[0].action_space
|
||||
|
||||
@property
|
||||
def observation_space(self):
|
||||
return self.env_batch[0].observation_space
|
||||
|
||||
def step(self, action_n, time):
|
||||
obs_n = []
|
||||
reward_n = []
|
||||
done_n = []
|
||||
info_n = {'n': []}
|
||||
i = 0
|
||||
for env in self.env_batch:
|
||||
obs, reward, done, _ = env.step(action_n[i:(i+env.n)], time)
|
||||
i += env.n
|
||||
obs_n += obs
|
||||
# reward = [r / len(self.env_batch) for r in reward]
|
||||
reward_n += reward
|
||||
done_n += done
|
||||
return obs_n, reward_n, done_n, info_n
|
||||
|
||||
def reset(self):
|
||||
obs_n = []
|
||||
for env in self.env_batch:
|
||||
obs_n += env.reset()
|
||||
return obs_n
|
||||
|
||||
# render environment
|
||||
def render(self, mode='human', close=True):
|
||||
results_n = []
|
||||
for env in self.env_batch:
|
||||
results_n += env.render(mode, close)
|
||||
return results_n
|
|
@ -0,0 +1,44 @@
|
|||
# An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
|
||||
# (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
|
||||
|
||||
import numpy as np
|
||||
|
||||
import gym
|
||||
from gym.spaces import prng
|
||||
|
||||
class MultiDiscrete(gym.Space):
|
||||
"""
|
||||
- The multi-discrete action space consists of a series of discrete action spaces with different parameters
|
||||
- It can be adapted to both a Discrete action space or a continuous (Box) action space
|
||||
- It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
|
||||
- It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
|
||||
where the discrete action space can take any integers from `min` to `max` (both inclusive)
|
||||
Note: A value of 0 always need to represent the NOOP action.
|
||||
e.g. Nintendo Game Controller
|
||||
- Can be conceptualized as 3 discrete action spaces:
|
||||
1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
|
||||
2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||
3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||
- Can be initialized as
|
||||
MultiDiscrete([ [0,4], [0,1], [0,1] ])
|
||||
"""
|
||||
def __init__(self, array_of_param_array):
|
||||
self.low = np.array([x[0] for x in array_of_param_array])
|
||||
self.high = np.array([x[1] for x in array_of_param_array])
|
||||
self.num_discrete_space = self.low.shape[0]
|
||||
|
||||
def sample(self):
|
||||
""" Returns a array with one sample from each discrete action space """
|
||||
# For each row: round(random .* (max - min) + min, 0)
|
||||
random_array = prng.np_random.rand(self.num_discrete_space)
|
||||
return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
|
||||
def contains(self, x):
|
||||
return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return self.num_discrete_space
|
||||
def __repr__(self):
|
||||
return "MultiDiscrete" + str(self.num_discrete_space)
|
||||
def __eq__(self, other):
|
||||
return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
|
|
@ -0,0 +1,52 @@
|
|||
import numpy as np
|
||||
from pyglet.window import key
|
||||
|
||||
# individual agent policy
|
||||
class Policy(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
def action(self, obs):
|
||||
raise NotImplementedError()
|
||||
|
||||
# interactive policy based on keyboard input
|
||||
# hard-coded to deal only with movement, not communication
|
||||
class InteractivePolicy(Policy):
|
||||
def __init__(self, env, agent_index):
|
||||
super(InteractivePolicy, self).__init__()
|
||||
self.env = env
|
||||
# hard-coded keyboard events
|
||||
self.move = [False for i in range(4)]
|
||||
self.comm = [False for i in range(env.world.dim_c)]
|
||||
# register keyboard events with this environment's window
|
||||
env.viewers[agent_index].window.on_key_press = self.key_press
|
||||
env.viewers[agent_index].window.on_key_release = self.key_release
|
||||
|
||||
def action(self, obs):
|
||||
# ignore observation and just act based on keyboard events
|
||||
if self.env.discrete_action_input:
|
||||
u = 0
|
||||
if self.move[0]: u = 1
|
||||
if self.move[1]: u = 2
|
||||
if self.move[2]: u = 4
|
||||
if self.move[3]: u = 3
|
||||
else:
|
||||
u = np.zeros(5) # 5-d because of no-move action
|
||||
if self.move[0]: u[1] += 1.0
|
||||
if self.move[1]: u[2] += 1.0
|
||||
if self.move[3]: u[3] += 1.0
|
||||
if self.move[2]: u[4] += 1.0
|
||||
if True not in self.move:
|
||||
u[0] += 1.0
|
||||
return np.concatenate([u, np.zeros(self.env.world.dim_c)])
|
||||
|
||||
# keyboard event callbacks
|
||||
def key_press(self, k, mod):
|
||||
if k==key.LEFT: self.move[0] = True
|
||||
if k==key.RIGHT: self.move[1] = True
|
||||
if k==key.UP: self.move[2] = True
|
||||
if k==key.DOWN: self.move[3] = True
|
||||
def key_release(self, k, mod):
|
||||
if k==key.LEFT: self.move[0] = False
|
||||
if k==key.RIGHT: self.move[1] = False
|
||||
if k==key.UP: self.move[2] = False
|
||||
if k==key.DOWN: self.move[3] = False
|
|
@ -0,0 +1,345 @@
|
|||
"""
|
||||
2D rendering framework
|
||||
"""
|
||||
from __future__ import division
|
||||
import os
|
||||
import six
|
||||
import sys
|
||||
|
||||
if "Apple" in sys.version:
|
||||
if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
|
||||
os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
|
||||
# (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
|
||||
|
||||
from gym.utils import reraise
|
||||
from gym import error
|
||||
|
||||
try:
|
||||
import pyglet
|
||||
except ImportError as e:
|
||||
reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
|
||||
|
||||
try:
|
||||
from pyglet.gl import *
|
||||
except ImportError as e:
|
||||
reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
RAD2DEG = 57.29577951308232
|
||||
|
||||
def get_display(spec):
|
||||
"""Convert a display specification (such as :0) into an actual Display
|
||||
object.
|
||||
|
||||
Pyglet only supports multiple Displays on Linux.
|
||||
"""
|
||||
if spec is None:
|
||||
return None
|
||||
elif isinstance(spec, six.string_types):
|
||||
return pyglet.canvas.Display(spec)
|
||||
else:
|
||||
raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
|
||||
|
||||
class Viewer(object):
|
||||
def __init__(self, width, height, display=None):
|
||||
display = get_display(display)
|
||||
|
||||
self.width = width
|
||||
self.height = height
|
||||
|
||||
self.window = pyglet.window.Window(width=width, height=height, display=display)
|
||||
self.window.on_close = self.window_closed_by_user
|
||||
self.geoms = []
|
||||
self.onetime_geoms = []
|
||||
self.transform = Transform()
|
||||
|
||||
glEnable(GL_BLEND)
|
||||
# glEnable(GL_MULTISAMPLE)
|
||||
glEnable(GL_LINE_SMOOTH)
|
||||
# glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE)
|
||||
glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
|
||||
glLineWidth(2.0)
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
|
||||
|
||||
def close(self):
|
||||
self.window.close()
|
||||
|
||||
def window_closed_by_user(self):
|
||||
self.close()
|
||||
|
||||
def set_bounds(self, left, right, bottom, top):
|
||||
assert right > left and top > bottom
|
||||
scalex = self.width/(right-left)
|
||||
scaley = self.height/(top-bottom)
|
||||
self.transform = Transform(
|
||||
translation=(-left*scalex, -bottom*scaley),
|
||||
scale=(scalex, scaley))
|
||||
|
||||
def add_geom(self, geom):
|
||||
self.geoms.append(geom)
|
||||
|
||||
def add_onetime(self, geom):
|
||||
self.onetime_geoms.append(geom)
|
||||
|
||||
def render(self, return_rgb_array=False):
|
||||
glClearColor(1,1,1,1)
|
||||
self.window.clear()
|
||||
self.window.switch_to()
|
||||
self.window.dispatch_events()
|
||||
self.transform.enable()
|
||||
for geom in self.geoms:
|
||||
geom.render()
|
||||
for geom in self.onetime_geoms:
|
||||
geom.render()
|
||||
self.transform.disable()
|
||||
arr = None
|
||||
if return_rgb_array:
|
||||
buffer = pyglet.image.get_buffer_manager().get_color_buffer()
|
||||
image_data = buffer.get_image_data()
|
||||
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
|
||||
# In https://github.com/openai/gym-http-api/issues/2, we
|
||||
# discovered that someone using Xmonad on Arch was having
|
||||
# a window of size 598 x 398, though a 600 x 400 window
|
||||
# was requested. (Guess Xmonad was preserving a pixel for
|
||||
# the boundary.) So we use the buffer height/width rather
|
||||
# than the requested one.
|
||||
arr = arr.reshape(buffer.height, buffer.width, 4)
|
||||
arr = arr[::-1,:,0:3]
|
||||
self.window.flip()
|
||||
self.onetime_geoms = []
|
||||
return arr
|
||||
|
||||
# Convenience
|
||||
def draw_circle(self, radius=10, res=30, filled=True, **attrs):
|
||||
geom = make_circle(radius=radius, res=res, filled=filled)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_polygon(self, v, filled=True, **attrs):
|
||||
geom = make_polygon(v=v, filled=filled)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_polyline(self, v, **attrs):
|
||||
geom = make_polyline(v=v)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_line(self, start, end, **attrs):
|
||||
geom = Line(start, end)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def get_array(self):
|
||||
self.window.flip()
|
||||
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
|
||||
self.window.flip()
|
||||
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
|
||||
arr = arr.reshape(self.height, self.width, 4)
|
||||
return arr[::-1,:,0:3]
|
||||
|
||||
def _add_attrs(geom, attrs):
|
||||
if "color" in attrs:
|
||||
geom.set_color(*attrs["color"])
|
||||
if "linewidth" in attrs:
|
||||
geom.set_linewidth(attrs["linewidth"])
|
||||
|
||||
class Geom(object):
|
||||
def __init__(self):
|
||||
self._color=Color((0, 0, 0, 1.0))
|
||||
self.attrs = [self._color]
|
||||
def render(self):
|
||||
for attr in reversed(self.attrs):
|
||||
attr.enable()
|
||||
self.render1()
|
||||
for attr in self.attrs:
|
||||
attr.disable()
|
||||
def render1(self):
|
||||
raise NotImplementedError
|
||||
def add_attr(self, attr):
|
||||
self.attrs.append(attr)
|
||||
def set_color(self, r, g, b, alpha=1):
|
||||
self._color.vec4 = (r, g, b, alpha)
|
||||
|
||||
class Attr(object):
|
||||
def enable(self):
|
||||
raise NotImplementedError
|
||||
def disable(self):
|
||||
pass
|
||||
|
||||
class Transform(Attr):
|
||||
def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
|
||||
self.set_translation(*translation)
|
||||
self.set_rotation(rotation)
|
||||
self.set_scale(*scale)
|
||||
def enable(self):
|
||||
glPushMatrix()
|
||||
glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
|
||||
glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
|
||||
glScalef(self.scale[0], self.scale[1], 1)
|
||||
def disable(self):
|
||||
glPopMatrix()
|
||||
def set_translation(self, newx, newy):
|
||||
self.translation = (float(newx), float(newy))
|
||||
def set_rotation(self, new):
|
||||
self.rotation = float(new)
|
||||
def set_scale(self, newx, newy):
|
||||
self.scale = (float(newx), float(newy))
|
||||
|
||||
class Color(Attr):
|
||||
def __init__(self, vec4):
|
||||
self.vec4 = vec4
|
||||
def enable(self):
|
||||
glColor4f(*self.vec4)
|
||||
|
||||
class LineStyle(Attr):
|
||||
def __init__(self, style):
|
||||
self.style = style
|
||||
def enable(self):
|
||||
glEnable(GL_LINE_STIPPLE)
|
||||
glLineStipple(1, self.style)
|
||||
def disable(self):
|
||||
glDisable(GL_LINE_STIPPLE)
|
||||
|
||||
class LineWidth(Attr):
|
||||
def __init__(self, stroke):
|
||||
self.stroke = stroke
|
||||
def enable(self):
|
||||
glLineWidth(self.stroke)
|
||||
|
||||
class Point(Geom):
|
||||
def __init__(self):
|
||||
Geom.__init__(self)
|
||||
def render1(self):
|
||||
glBegin(GL_POINTS) # draw point
|
||||
glVertex3f(0.0, 0.0, 0.0)
|
||||
glEnd()
|
||||
|
||||
class FilledPolygon(Geom):
|
||||
def __init__(self, v):
|
||||
Geom.__init__(self)
|
||||
self.v = v
|
||||
def render1(self):
|
||||
if len(self.v) == 4 : glBegin(GL_QUADS)
|
||||
elif len(self.v) > 4 : glBegin(GL_POLYGON)
|
||||
else: glBegin(GL_TRIANGLES)
|
||||
for p in self.v:
|
||||
glVertex3f(p[0], p[1],0) # draw each vertex
|
||||
glEnd()
|
||||
|
||||
color = (self._color.vec4[0] * 0.5, self._color.vec4[1] * 0.5, self._color.vec4[2] * 0.5, self._color.vec4[3] * 0.5)
|
||||
glColor4f(*color)
|
||||
glBegin(GL_LINE_LOOP)
|
||||
for p in self.v:
|
||||
glVertex3f(p[0], p[1],0) # draw each vertex
|
||||
glEnd()
|
||||
|
||||
def make_circle(radius=10, res=30, filled=True):
|
||||
points = []
|
||||
for i in range(res):
|
||||
ang = 2*math.pi*i / res
|
||||
points.append((math.cos(ang)*radius, math.sin(ang)*radius))
|
||||
if filled:
|
||||
return FilledPolygon(points)
|
||||
else:
|
||||
return PolyLine(points, True)
|
||||
|
||||
def make_polygon(v, filled=True):
|
||||
if filled: return FilledPolygon(v)
|
||||
else: return PolyLine(v, True)
|
||||
|
||||
def make_polyline(v):
|
||||
return PolyLine(v, False)
|
||||
|
||||
def make_capsule(length, width):
|
||||
l, r, t, b = 0, length, width/2, -width/2
|
||||
box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
|
||||
circ0 = make_circle(width/2)
|
||||
circ1 = make_circle(width/2)
|
||||
circ1.add_attr(Transform(translation=(length, 0)))
|
||||
geom = Compound([box, circ0, circ1])
|
||||
return geom
|
||||
|
||||
class Compound(Geom):
|
||||
def __init__(self, gs):
|
||||
Geom.__init__(self)
|
||||
self.gs = gs
|
||||
for g in self.gs:
|
||||
g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
|
||||
def render1(self):
|
||||
for g in self.gs:
|
||||
g.render()
|
||||
|
||||
class PolyLine(Geom):
|
||||
def __init__(self, v, close):
|
||||
Geom.__init__(self)
|
||||
self.v = v
|
||||
self.close = close
|
||||
self.linewidth = LineWidth(1)
|
||||
self.add_attr(self.linewidth)
|
||||
def render1(self):
|
||||
glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
|
||||
for p in self.v:
|
||||
glVertex3f(p[0], p[1],0) # draw each vertex
|
||||
glEnd()
|
||||
def set_linewidth(self, x):
|
||||
self.linewidth.stroke = x
|
||||
|
||||
class Line(Geom):
|
||||
def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
|
||||
Geom.__init__(self)
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.linewidth = LineWidth(1)
|
||||
self.add_attr(self.linewidth)
|
||||
|
||||
def render1(self):
|
||||
glBegin(GL_LINES)
|
||||
glVertex2f(*self.start)
|
||||
glVertex2f(*self.end)
|
||||
glEnd()
|
||||
|
||||
class Image(Geom):
|
||||
def __init__(self, fname, width, height):
|
||||
Geom.__init__(self)
|
||||
self.width = width
|
||||
self.height = height
|
||||
img = pyglet.image.load(fname)
|
||||
self.img = img
|
||||
self.flip = False
|
||||
def render1(self):
|
||||
self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
|
||||
|
||||
# ================================================================
|
||||
|
||||
class SimpleImageViewer(object):
|
||||
def __init__(self, display=None):
|
||||
self.window = None
|
||||
self.isopen = False
|
||||
self.display = display
|
||||
def imshow(self, arr):
|
||||
if self.window is None:
|
||||
height, width, channels = arr.shape
|
||||
self.window = pyglet.window.Window(width=width, height=height, display=self.display)
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.isopen = True
|
||||
assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
|
||||
image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
|
||||
self.window.clear()
|
||||
self.window.switch_to()
|
||||
self.window.dispatch_events()
|
||||
image.blit(0,0)
|
||||
self.window.flip()
|
||||
def close(self):
|
||||
if self.isopen:
|
||||
self.window.close()
|
||||
self.isopen = False
|
||||
def __del__(self):
|
||||
self.close()
|
|
@ -0,0 +1,10 @@
|
|||
import numpy as np
|
||||
|
||||
# defines scenario upon which the world is built
|
||||
class BaseScenario(object):
|
||||
# create elements of the world
|
||||
def make_world(self):
|
||||
raise NotImplementedError()
|
||||
# create initial conditions of the world
|
||||
def reset_world(self, world):
|
||||
raise NotImplementedError()
|
|
@ -0,0 +1,7 @@
|
|||
import imp
|
||||
import os.path as osp
|
||||
|
||||
|
||||
def load(name):
|
||||
pathname = osp.join(osp.dirname(__file__), name)
|
||||
return imp.load_source('', pathname)
|
|
@ -0,0 +1,139 @@
|
|||
import numpy as np
|
||||
from multiagent.core import World, Agent, Landmark
|
||||
from multiagent.scenario import BaseScenario
|
||||
|
||||
|
||||
class Scenario(BaseScenario):
|
||||
|
||||
def make_world(self):
|
||||
world = World()
|
||||
# set any world properties first
|
||||
world.dim_c = 2
|
||||
num_agents = 3
|
||||
world.num_agents = num_agents
|
||||
num_adversaries = 1
|
||||
num_landmarks = num_agents - 1
|
||||
# add agents
|
||||
world.agents = [Agent() for i in range(num_agents)]
|
||||
for i, agent in enumerate(world.agents):
|
||||
agent.name = 'agent %d' % i
|
||||
agent.collide = False
|
||||
agent.silent = True
|
||||
agent.adversary = True if i < num_adversaries else False
|
||||
agent.size = 0.15
|
||||
# add landmarks
|
||||
world.landmarks = [Landmark() for i in range(num_landmarks)]
|
||||
for i, landmark in enumerate(world.landmarks):
|
||||
landmark.name = 'landmark %d' % i
|
||||
landmark.collide = False
|
||||
landmark.movable = False
|
||||
landmark.size = 0.08
|
||||
# make initial conditions
|
||||
self.reset_world(world)
|
||||
return world
|
||||
|
||||
def reset_world(self, world):
|
||||
# random properties for agents
|
||||
world.agents[0].color = np.array([0.85, 0.35, 0.35])
|
||||
for i in range(1, world.num_agents):
|
||||
world.agents[i].color = np.array([0.35, 0.35, 0.85])
|
||||
# random properties for landmarks
|
||||
for i, landmark in enumerate(world.landmarks):
|
||||
landmark.color = np.array([0.15, 0.15, 0.15])
|
||||
# set goal landmark
|
||||
goal = np.random.choice(world.landmarks)
|
||||
goal.color = np.array([0.15, 0.65, 0.15])
|
||||
for agent in world.agents:
|
||||
agent.goal_a = goal
|
||||
# set random initial states
|
||||
for agent in world.agents:
|
||||
agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
|
||||
agent.state.p_vel = np.zeros(world.dim_p)
|
||||
agent.state.c = np.zeros(world.dim_c)
|
||||
for i, landmark in enumerate(world.landmarks):
|
||||
landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
|
||||
landmark.state.p_vel = np.zeros(world.dim_p)
|
||||
|
||||
def benchmark_data(self, agent, world):
|
||||
# returns data for benchmarking purposes
|
||||
if agent.adversary:
|
||||
return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
|
||||
else:
|
||||
dists = []
|
||||
for l in world.landmarks:
|
||||
dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
|
||||
dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
|
||||
return tuple(dists)
|
||||
|
||||
# return all agents that are not adversaries
|
||||
def good_agents(self, world):
|
||||
return [agent for agent in world.agents if not agent.adversary]
|
||||
|
||||
# return all adversarial agents
|
||||
def adversaries(self, world):
|
||||
return [agent for agent in world.agents if agent.adversary]
|
||||
|
||||
def reward(self, agent, world):
|
||||
# Agents are rewarded based on minimum agent distance to each landmark
|
||||
return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
|
||||
|
||||
def agent_reward(self, agent, world):
|
||||
# Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
|
||||
shaped_reward = True
|
||||
shaped_adv_reward = True
|
||||
|
||||
# Calculate negative reward for adversary
|
||||
adversary_agents = self.adversaries(world)
|
||||
if shaped_adv_reward: # distance-based adversary reward
|
||||
adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
|
||||
else: # proximity-based adversary reward (binary)
|
||||
adv_rew = 0
|
||||
for a in adversary_agents:
|
||||
if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
|
||||
adv_rew -= 5
|
||||
|
||||
# Calculate positive reward for agents
|
||||
good_agents = self.good_agents(world)
|
||||
if shaped_reward: # distance-based agent reward
|
||||
pos_rew = -min(
|
||||
[np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
|
||||
else: # proximity-based agent reward (binary)
|
||||
pos_rew = 0
|
||||
if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
|
||||
< 2 * agent.goal_a.size:
|
||||
pos_rew += 5
|
||||
pos_rew -= min(
|
||||
[np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
|
||||
return pos_rew + adv_rew
|
||||
|
||||
def adversary_reward(self, agent, world):
|
||||
# Rewarded based on proximity to the goal landmark
|
||||
shaped_reward = True
|
||||
if shaped_reward: # distance-based reward
|
||||
return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
|
||||
else: # proximity-based reward (binary)
|
||||
adv_rew = 0
|
||||
if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
|
||||
adv_rew += 5
|
||||
return adv_rew
|
||||
|
||||
|
||||
def observation(self, agent, world):
|
||||
# get positions of all entities in this agent's reference frame
|
||||
entity_pos = []
|
||||
for entity in world.landmarks:
|
||||
entity_pos.append(entity.state.p_pos - agent.state.p_pos)
|
||||
# entity colors
|
||||
entity_color = []
|
||||
for entity in world.landmarks:
|
||||
entity_color.append(entity.color)
|
||||
# communication of all other agents
|
||||
other_pos = []
|
||||
for other in world.agents:
|
||||
if other is agent: continue
|
||||
other_pos.append(other.state.p_pos - agent.state.p_pos)
|
||||
|
||||
if not agent.adversary:
|
||||
return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
|
||||
else:
|
||||
return np.concatenate(entity_pos + other_pos)
|
|
@ -0,0 +1,86 @@
|
|||
from tqdm import tqdm
|
||||
from agent import Agent
|
||||
from common.replay_buffer import Buffer
|
||||
import torch
|
||||
import os
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
class Runner:
|
||||
def __init__(self, args, env):
|
||||
self.args = args
|
||||
self.noise = args.noise_rate
|
||||
self.epsilon = args.epsilon
|
||||
self.episode_limit = args.max_episode_len
|
||||
self.env = env
|
||||
self.agents = self._init_agents()
|
||||
self.buffer = Buffer(args)
|
||||
self.save_path = self.args.save_dir + '/' + self.args.scenario_name
|
||||
if not os.path.exists(self.save_path):
|
||||
os.makedirs(self.save_path)
|
||||
|
||||
def _init_agents(self):
|
||||
agents = []
|
||||
for i in range(self.args.n_agents):
|
||||
agent = Agent(i, self.args)
|
||||
agents.append(agent)
|
||||
return agents
|
||||
|
||||
def run(self):
|
||||
returns = []
|
||||
for time_step in tqdm(range(self.args.time_steps)):
|
||||
# reset the environment
|
||||
if time_step % self.episode_limit == 0:
|
||||
s = self.env.reset()
|
||||
u = []
|
||||
actions = []
|
||||
with torch.no_grad():
|
||||
for agent_id, agent in enumerate(self.agents):
|
||||
action = agent.select_action(s[agent_id], self.noise, self.epsilon)
|
||||
u.append(action)
|
||||
actions.append(action)
|
||||
for i in range(self.args.n_agents, self.args.n_players):
|
||||
actions.append([0, np.random.rand() * 2 - 1, 0, np.random.rand() * 2 - 1, 0])
|
||||
s_next, r, done, info = self.env.step(actions)
|
||||
self.buffer.store_episode(s[:self.args.n_agents], u, r[:self.args.n_agents], s_next[:self.args.n_agents])
|
||||
s = s_next
|
||||
if self.buffer.current_size >= self.args.batch_size:
|
||||
transitions = self.buffer.sample(self.args.batch_size)
|
||||
for agent in self.agents:
|
||||
other_agents = self.agents.copy()
|
||||
other_agents.remove(agent)
|
||||
agent.learn(transitions, other_agents)
|
||||
if time_step > 0 and time_step % self.args.evaluate_rate == 0:
|
||||
returns.append(self.evaluate())
|
||||
plt.figure()
|
||||
plt.plot(range(len(returns)), returns)
|
||||
plt.xlabel('episode * ' + str(self.args.evaluate_rate / self.episode_limit))
|
||||
plt.ylabel('average returns')
|
||||
plt.savefig(self.save_path + '/plt.png', format='png')
|
||||
self.noise = max(0.05, self.noise - 0.0000005)
|
||||
self.epsilon = max(0.05, self.epsilon - 0.0000005)
|
||||
np.save(self.save_path + '/returns.pkl', returns)
|
||||
|
||||
def evaluate(self):
|
||||
returns = []
|
||||
for episode in range(self.args.evaluate_episodes):
|
||||
# reset the environment
|
||||
s = self.env.reset()
|
||||
rewards = 0
|
||||
for time_step in range(self.args.evaluate_episode_len):
|
||||
# if (episode > self.args.evaluate_episode_len - 50):
|
||||
#self.env.render()
|
||||
actions = []
|
||||
with torch.no_grad():
|
||||
for agent_id, agent in enumerate(self.agents):
|
||||
action = agent.select_action(s[agent_id], 0, 0)
|
||||
actions.append(action)
|
||||
for i in range(self.args.n_agents, self.args.n_players):
|
||||
actions.append([0, np.random.rand() * 2 - 1, 0, np.random.rand() * 2 - 1, 0])
|
||||
s_next, r, done, info = self.env.step(actions)
|
||||
rewards += r[0]
|
||||
s = s_next
|
||||
returns.append(rewards)
|
||||
if (episode % 1000 == 0):
|
||||
print('Returns is', rewards)
|
||||
return sum(returns) / self.args.evaluate_episodes
|
Loading…
Reference in New Issue