Skip to content
Snippets Groups Projects
Commit a8964e1c authored by Anmulwar, Sweta V (PG/R - Elec Electronic Eng)'s avatar Anmulwar, Sweta V (PG/R - Elec Electronic Eng)
Browse files

change reward function

parent 225884e5
No related branches found
No related tags found
No related merge requests found
No preview for this file type
...@@ -49,24 +49,29 @@ namespace QLearningApp.MachineLearning ...@@ -49,24 +49,29 @@ namespace QLearningApp.MachineLearning
int stateIndex = initialStateIndex; int stateIndex = initialStateIndex;
int counter = 0; int counter = 0;
List<double> actions = new List<double>(); List<double> actions = new List<double>();
List<double> stateFPS = new List<double>();
double prev_state = result.InitialState;
while (true) while (true)
{ {
result.Steps += 1; result.Steps += 1;
counter++; counter++;
int actionIndex = _qTable[stateIndex].ToList().IndexOf(_qTable[stateIndex].Max()); int actionIndex = _qTable[stateIndex].ToList().IndexOf(_qTable[stateIndex].Max());
//state = action; //state = action;
double state = _qLearningProblem.SimulatedFPSValues(actionIndex, 4); double state = _qLearningProblem.SimulatedFPSValues(actionIndex, 4);
double statefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - state)).First(); double statefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - state)).First();
stateIndex = _qLearningProblem.fps_spaceIndex(statefps_space); stateIndex = _qLearningProblem.fps_spaceIndex(statefps_space);
double gamma_action = _qLearningProblem.gamma_space[actionIndex]; double gamma_action = _qLearningProblem.gamma_space[actionIndex];
Console.WriteLine("Current state: " + prev_state + ", Action: " + gamma_action + ", Next state: " + state);
prev_state = state;
actions.Add(gamma_action); actions.Add(gamma_action);
stateFPS.Add(state);
/*if (_qLearningProblem.GoalStateIsReached(action)) /*if (_qLearningProblem.GoalStateIsReached(action))
{ {
result.EndState = action; result.EndState = action;
break; break;
}*/ }*/
if (counter == 100) if (counter == 50)
{ break; } { break; }
} }
result.Actions = actions.ToArray(); result.Actions = actions.ToArray();
...@@ -82,8 +87,8 @@ namespace QLearningApp.MachineLearning ...@@ -82,8 +87,8 @@ namespace QLearningApp.MachineLearning
{ {
currentState = TakeAction(currentState);//currentState= FPS, return should be FPS value currentState = TakeAction(currentState);//currentState= FPS, return should be FPS value
counter++; counter++;
if (counter == 100) if (counter == 500)
break;//will run 100 times break;//will run 500 times
} }
} }
...@@ -96,9 +101,11 @@ namespace QLearningApp.MachineLearning ...@@ -96,9 +101,11 @@ namespace QLearningApp.MachineLearning
// double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma]; // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma];
double delta_action = 20; //fixed for now, can be changed in the future double delta_action = 20; //fixed for now, can be changed in the future
int delta_actionIndex = 0; int delta_actionIndex = 0;
double rho = 0.4;
double saReward = _qLearningProblem.GetReward(currentState, gamma_action, delta_action); double saReward = _qLearningProblem.GetReward(currentState, gamma_action, delta_action);
double nextState = _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, delta_actionIndex); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded // Use rho to have a slowly & smoothly changing FPS value after an action is taken
double nextState = rho * currentState + (1-rho) * _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, delta_actionIndex); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded
double nextStatefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - nextState)).First();//need to test first, looking for the closet value of the FPS double nextStatefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - nextState)).First();//need to test first, looking for the closet value of the FPS
int nextStateIndex = _qLearningProblem.fps_spaceIndex(nextStatefps_space); int nextStateIndex = _qLearningProblem.fps_spaceIndex(nextStatefps_space);
double nsReward = _qTable[nextStateIndex].Max(); double nsReward = _qTable[nextStateIndex].Max();
......
...@@ -9,14 +9,16 @@ namespace QLearningApp.MachineLearning ...@@ -9,14 +9,16 @@ namespace QLearningApp.MachineLearning
public int Steps { get; set; } public int Steps { get; set; }
public double[] Actions { get; set; } public double[] Actions { get; set; }
public double[] State { get; set; }
public override string ToString() public override string ToString()
{ {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.AppendLine($"Agent needed {Steps} steps to find the solution"); sb.AppendLine($"Agent needed {Steps} steps to find the solution");
sb.AppendLine($"Agent Initial State: {InitialState}"); sb.AppendLine($"Agent Initial State: {InitialState}");
foreach (var action in Actions) /*foreach (var action in Actions)
sb.AppendLine($"Action: {action}"); sb.AppendLine($"Action: {action}");*/
sb.AppendLine($"Agent arrived at the goal state: {EndState}"); //sb.AppendLine($"Agent arrived at the goal state: {EndState}");
return sb.ToString(); return sb.ToString();
} }
} }
......
using QLearningApp.MachineLearning; using QLearningApp.MachineLearning;
using System;
using System.Collections.Generic; using System.Collections.Generic;
namespace QLearningApp namespace QLearningApp
{ {
public class RLSync public class RLSync
{ {
private Random _random = new Random();
// Actions // Actions
public double[] gamma_space = { 10, 20, 30, 40, 50, 100 }; // the possible values of gamma public double[] gamma_space = { 10, 20, 30, 40, 50, 100 }; // the possible values of gamma
// private double[] delta_space = { 10, 20, 30, 40, 50, 100 }; // possible values of delta // private double[] delta_space = { 10, 20, 30, 40, 50, 100 }; // possible values of delta
public double[] delta_space = { 20 }; public double[] delta_space = { 20 };
public double alpha = 0.5; //original value public double alpha = 0.2; //original value
public double beta = 0.5;//original value public double beta = 0.2;//original value
// State space // State space
public double[] fps_space = { 10, 15, 20, 25, 30 }; public double[] fps_space = { 10, 12, 15, 17, 20, 22, 25, 27, 30 };//PE, APL 0-30 read it form file
private double[][] rewards = new double[5][]; private double[][] rewards = new double[5][];
public double target_fps = 30; public double target_fps = 30;
/*
target for syn window if it is 100ms, target should be given by user not hard-coded
ini satse: 20 FPS, 100ms latency
initial values 30FPS, APL 100ms--> Target APL 20ms, FPS >=15
FPS, PE: 30FPS, PE: 25ms --> target
trade -off
*/
public int NumberOfStates => fps_space.Length; public int NumberOfStates => fps_space.Length;
...@@ -26,17 +35,21 @@ namespace QLearningApp ...@@ -26,17 +35,21 @@ namespace QLearningApp
public double GetReward(double currentState, double action_gamma, double action_delta) public double GetReward(double currentState, double action_gamma, double action_delta)
{ {
//double reward = 1 / (System.Math.Abs(currentState - target_fps)) - alpha * action_gamma - beta * action_delta; //double reward = 1 / (System.Math.Abs(currentState - target_fps)) - alpha * action_gamma - beta * action_delta;
double reward = 1 / (1+System.Math.Abs(currentState - target_fps)) - alpha * action_gamma/100 - beta * action_delta/100; // gamma nand delta are normalized to [0, 1] //double reward = 1 / (1+System.Math.Abs(currentState - target_fps)) - alpha * action_gamma/100 - beta * action_delta/100; // gamma nand delta are normalized to [0, 1]; does not perform well when fps is very low
double reward = 1 / (1 + System.Math.Abs(currentState - target_fps)) * (1 - alpha * action_gamma / 100 - beta * action_delta / 100); // gamma nand delta are normalized to [0, 1]
return reward; return reward;
} }
public double SimulatedFPSValues(int gammaIndex, int deltaIndex) //these values are for trial and this is the simulated environment public double SimulatedFPSValues(int gammaIndex, int deltaIndex) //these values are for trial and this is the simulated environment
{ {
//keeping deltaIndex as 4 so that RL algo could get the FPS 30 value, changed it to 2 to get the better Q values //keeping deltaIndex as 4 so that RL algo could get the FPS 30 value, changed it to 2 to get the better Q values
deltaIndex = 4; deltaIndex = _random.Next(3,5);//trying randomness
double random_fps_noise = 3*(_random.NextDouble()-0.5); // adding uniform random noise to observed fps value
// deltaIndex = 3;
double[,] FPS = new double[,] { { 0, 10, 10, 15, 15, 15 }, { 10, 12, 13, 15, 20, 22 }, { 10, 15, 15, 20, 25, 25 }, { 10, 20, 20, 25, 30, 30 }, { 10, 20, 25, 30, 30, 30 }, { 25, 25, 27, 30, 30, 30 } }; double[,] FPS = new double[,] { { 0, 10, 10, 15, 15, 15 }, { 10, 12, 13, 15, 20, 22 }, { 10, 15, 15, 20, 25, 25 }, { 10, 20, 20, 25, 30, 30 }, { 10, 20, 25, 30, 30, 30 }, { 25, 25, 27, 30, 30, 30 } };
return FPS[gammaIndex,deltaIndex]; // Further improvements in the simulation: depending on changing network delays, the FPS matrix can change, allowing higher FPS at smaller values of gamma and delta
return FPS[gammaIndex,deltaIndex] + random_fps_noise;
} }
public int fps_spaceIndex(double FPS) public int fps_spaceIndex(double FPS)
......
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment