Spaces:

Ahren09
/

AgentReview

Running

AgentReview / agentreview /environments /base.py

Yiqiao Jin

Initial Commit

bdafe83 about 1 year ago

5.52 kB

	from abc import abstractmethod
	from dataclasses import dataclass
	from typing import Dict, List

	from ..config import Configurable, EnvironmentConfig
	from ..message import Message
	from ..utils import AttributedDict


	@dataclass
	class TimeStep(AttributedDict):
	"""
	Represents a single step in time within the simulation.

	It includes observation, reward, and terminal state.

	Attributes:
	observation (List[Message]): A list of messages (observations) for the current timestep.
	reward (Dict[str, float]): A dictionary with player names as keys and corresponding rewards as values.
	terminal (bool): A boolean indicating whether the current state is terminal (end of episode).
	"""

	observation: List[Message]
	reward: Dict[str, float]
	terminal: bool


	class Environment(Configurable):
	"""
	Abstract class representing an environment.

	It defines the necessary methods any environment must implement.

	Inherits from:
	Configurable: A custom class that provides methods to handle configuration settings.

	Attributes:
	type_name (str): Type of the environment, typically set to the lower case of the class name.

	Note:
	Subclasses should override and implement the abstract methods defined here.
	"""

	type_name = None
	phase_index = 0
	task = None
	@abstractmethod
	def __init__(self, player_names: List[str], **kwargs):
	"""
	Initialize the Environment.

	Parameters:
	player_names (List[str]): Names of the players in the environment.
	"""
	super().__init__(
	player_names=player_names, **kwargs
	) # registers the arguments with Configurable
	self.player_names = player_names

	def __init_subclass__(cls, **kwargs):
	"""
	Automatically called when a subclass is being initialized.

	Here it's used to check if the subclass has the required attributes.
	"""
	for required in ("type_name",):
	if getattr(cls, required) is None:
	cls.type_name = cls.__name__.lower()

	return super().__init_subclass__(**kwargs)

	@abstractmethod
	def reset(self):
	"""
	Reset the environment to its initial state.

	Note:
	This method must be implemented by subclasses.
	"""
	pass

	def to_config(self) -> EnvironmentConfig:
	self._config_dict["env_type"] = self.type_name
	return EnvironmentConfig(**self._config_dict)

	@property
	def num_players(self) -> int:
	"""Get the number of players."""
	return len(self.player_names)

	@abstractmethod
	def get_next_player(self) -> str:
	"""
	Return the name of the next player.

	Note:
	This method must be implemented by subclasses.

	Returns:
	str: The name of the next player.
	"""
	pass

	@abstractmethod
	def get_observation(self, player_name=None) -> List[Message]:
	"""
	Return observation for a given player.

	Note:
	This method must be implemented by subclasses.

	Parameters:
	player_name (str, optional): The name of the player for whom to get the observation.

	Returns:
	List[Message]: The observation for the player in the form of a list of messages.
	"""
	pass

	@abstractmethod
	def print(self):
	"""Print the environment state."""
	pass

	@abstractmethod
	def step(self, player_name: str, action: str) -> TimeStep:
	"""
	Execute a step in the environment given an action from a player.

	Note:
	This method must be implemented by subclasses.

	Parameters:
	player_name (str): The name of the player.
	action (str): The action that the player wants to take.

	Returns:
	TimeStep: An object of the TimeStep class containing the observation, reward, and done state.
	"""
	pass

	@abstractmethod
	def check_action(self, action: str, player_name: str) -> bool:
	"""
	Check whether a given action is valid for a player.

	Note:
	This method must be implemented by subclasses.

	Parameters:
	action (str): The action to be checked.
	player_name (str): The name of the player.

	Returns:
	bool: True if the action is valid, False otherwise.
	"""
	return True

	@abstractmethod
	def is_terminal(self) -> bool:
	"""
	Check whether the environment is in a terminal state (end of episode).

	Note:
	This method must be implemented by subclasses.

	Returns:
	bool: True if the environment is in a terminal state, False otherwise.
	"""
	pass

	def get_zero_rewards(self) -> Dict[str, float]:
	"""
	Return a dictionary with all player names as keys and zero as reward.

	Returns:
	Dict[str, float]: A dictionary of players and their rewards (all zero).
	"""
	return {player_name: 0.0 for player_name in self.player_names}

	def get_one_rewards(self) -> Dict[str, float]:
	"""
	Return a dictionary with all player names as keys and one as reward.

	Returns:
	Dict[str, float]: A dictionary of players and their rewards (all one).
	"""
	return {player_name: 1.0 for player_name in self.player_names}