bmstu-marl/maddpg/actor_critic.py

import torch
import torch.nn as nn
import torch.nn.functional as F


# define the actor network
class Actor(nn.Module):
    def __init__(self, args, agent_id):
        super(Actor, self).__init__()
        self.max_action = args.high_action
        self.fc1 = nn.Linear(args.obs_shape[agent_id], 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.action_out = nn.Linear(128, args.action_shape[agent_id])

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        actions = self.max_action * torch.tanh(self.action_out(x))

        return actions


class Critic(nn.Module):
    def __init__(self, args):
        super(Critic, self).__init__()
        self.max_action = args.high_action
        self.fc1 = nn.Linear(sum(args.obs_shape) + sum(args.action_shape), 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.q_out = nn.Linear(64, 1)

    def forward(self, state, action):
        state = torch.cat(state, dim=1)
        for i in range(len(action)):
            action[i] /= self.max_action
        action = torch.cat(action, dim=1)
        x = torch.cat([state, action], dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        q_value = self.q_out(x)
        return q_value
default actor=adam3-64, critic=adam3-64, b=256, ep=245k 2023-01-13 17:16:51 +03:00			`import torch`
			`import torch.nn as nn`
			`import torch.nn.functional as F`


			`# define the actor network`
			`class Actor(nn.Module):`
			`def __init__(self, args, agent_id):`
			`super(Actor, self).__init__()`
			`self.max_action = args.high_action`
			`self.fc1 = nn.Linear(args.obs_shape[agent_id], 64)`
			`self.fc2 = nn.Linear(64, 64)`
			`self.fc3 = nn.Linear(64, 64)`
			`self.action_out = nn.Linear(128, args.action_shape[agent_id])`

			`def forward(self, x):`
			`x = F.relu(self.fc1(x))`
			`x = F.relu(self.fc2(x))`
			`x = F.relu(self.fc3(x))`
			`actions = self.max_action * torch.tanh(self.action_out(x))`

			`return actions`


			`class Critic(nn.Module):`
			`def __init__(self, args):`
			`super(Critic, self).__init__()`
			`self.max_action = args.high_action`
			`self.fc1 = nn.Linear(sum(args.obs_shape) + sum(args.action_shape), 64)`
			`self.fc2 = nn.Linear(64, 64)`
			`self.fc3 = nn.Linear(64, 64)`
			`self.q_out = nn.Linear(64, 1)`

			`def forward(self, state, action):`
			`state = torch.cat(state, dim=1)`
			`for i in range(len(action)):`
			`action[i] /= self.max_action`
			`action = torch.cat(action, dim=1)`
			`x = torch.cat([state, action], dim=1)`
			`x = F.relu(self.fc1(x))`
			`x = F.relu(self.fc2(x))`
			`x = F.relu(self.fc3(x))`
			`q_value = self.q_out(x)`
			`return q_value`