diff --git a/exploration_strategies.py b/exploration_strategies.py new file mode 100644 index 0000000000000000000000000000000000000000..8935bc7cbcad7d785a87a79048cce7b9636fd7d9 --- /dev/null +++ b/exploration_strategies.py @@ -0,0 +1,22 @@ +import numpy as np + +class EpsilonGreedyStrategy: + def __init__(self, epsilon=0.1): + self.epsilon = epsilon + + def select_action(self, q_values): + if np.random.rand() < self.epsilon: + # Exploration: choose a random action + return np.random.choice(len(q_values)) + else: + # Exploitation: choose the action with the highest Q-value + return np.argmax(q_values) + +class SoftmaxStrategy: + def __init__(self, temperature=1.0): + self.temperature = temperature + + def select_action(self, q_values): + exp_q = np.exp(q_values / self.temperature) + probabilities = exp_q / np.sum(exp_q) + return np.random.choice(len(q_values), p=probabilities) \ No newline at end of file