Source code for commonpower.control.safety_layer.penalties

"""
Collection of penalties.
"""


[docs] class BasePenalty: """ Base class for penalties. A penalty is computed if the action proposed by the agent needs to be changed by the safety layer. """
[docs] def get_correction_penalty(self, **kwargs) -> float: """ Computes the penalty, must be implememnted by subclasses. args: None returns: float: penalty """ raise NotImplementedError
[docs] class NoPenalty(BasePenalty): """ No additional penalty for action correction. """
[docs] def get_correction_penalty(self) -> float: """ No penalty for action correction, therefore returns 0 always. args: None returns: float: penalty_value """ return 0.0
[docs] class ConstantPenalty(BasePenalty): def __init__(self, penalty_constant: float = 1.0) -> None: """ Constant penalty. args: penalty_factor (float): factor for penalty returns: None """ super().__init__() self.penalty_constant = penalty_constant
[docs] def get_correction_penalty(self) -> float: """ Computes the penalty. args: None returns: float: penalty_value """ return self.penalty_constant
[docs] class DistanceDependingPenalty(BasePenalty): def __init__(self, penalty_factor: float = 0.1) -> None: """ Penalty depending on the distance between the proposed action and the corrected action. args: penalty_factor (float): factor for penalty returns: None """ super().__init__() self.penalty_factor = penalty_factor
[docs] def get_correction_penalty(self, safety_obj: float) -> float: """ Computes the penalty. args: safety_obj (float): The optimal value of the objective function of the safety shield returns: float: penalty_value """ return self.penalty_factor * safety_obj