diff --git a/.vs/QLearningApp/v16/.suo b/.vs/QLearningApp/v16/.suo index 2c923d19ec055f7212ed0a292944aa1c237b41c9..4440aaa92ef0803fdda288649133ffcf000c393b 100644 Binary files a/.vs/QLearningApp/v16/.suo and b/.vs/QLearningApp/v16/.suo differ diff --git a/QLearningApp/MachineLearning/Models/QLearning.cs b/QLearningApp/MachineLearning/Models/QLearning.cs index 852a0c893cf1ca73ff06c9054f63b75251aa83c9..4734e96d441d60e24cffece6941177272453f1ce 100644 --- a/QLearningApp/MachineLearning/Models/QLearning.cs +++ b/QLearningApp/MachineLearning/Models/QLearning.cs @@ -12,23 +12,27 @@ namespace QLearningApp.MachineLearning private double _gamma; public double Gamma { get => _gamma; } - private double[][] _qTable; - public double[][] QTable { get => _qTable; } + private double[,,] _qTable; + public double[,,] QTable { get => _qTable; } private RLSync _qLearningProblem; + List<double> matrixList = new List<double>(); public QLearning(double gamma, RLSync qLearningProblem, double TargetFPS) { _qLearningProblem = qLearningProblem; _gamma = gamma; // discount factor - _qTable = new double[qLearningProblem.NumberOfStates][]; + _qTable = new double[qLearningProblem.NumberOfStates,qLearningProblem.gamma_length,qLearningProblem.delta_length]; /* Need to convert it in the 3-D array */ - for (int i = 0; i < qLearningProblem.NumberOfStates; i++) - _qTable[i] = new double[qLearningProblem.NumberOfActions]; + /*for (int i = 0; i < qLearningProblem.NumberOfStates; i++) //fps_space.Length;//9 + _qTable[i] = new double[qLearningProblem.NumberOfActions];*/ //6*6=36 for (int i = 0; i < qLearningProblem.NumberOfStates; i++) { - for (int j = 0; j < qLearningProblem.NumberOfActions; j++) - _qTable[i][j] = 0; + for (int j = 0; j < qLearningProblem.gamma_length; j++) + { + for (int k = 0; k < qLearningProblem.delta_length; k++) + _qTable[i,j,k] = 0; + } } qLearningProblem.Target_fps = TargetFPS; /*I can add PE and APL here*/ @@ -48,28 +52,47 @@ namespace QLearningApp.MachineLearning public QLearningStats Run(int initialStateIndex)//returning an object { if (initialStateIndex < 0 || initialStateIndex > _qLearningProblem.NumberOfStates) throw new ArgumentException($"The initial state can be between [0-{_qLearningProblem.NumberOfStates}", nameof(initialStateIndex)); - var result = new QLearningStats(); result.InitialState = _qLearningProblem.fps_space[initialStateIndex]; int stateIndex = initialStateIndex; - int counter = 0; - List<double> actions = new List<double>(); + int jmax = 0, kmax = 0; + List<double> gammaactions = new List<double>(); + List<double> deltaactions = new List<double>(); List<double> stateFPS = new List<double>(); double prev_state = result.InitialState; while (true) { result.Steps += 1; - counter++; - int actionIndex = _qTable[stateIndex].ToList().IndexOf(_qTable[stateIndex].Max()); + //int actionIndex = _qTable[stateIndex].ToList().IndexOf(_qTable[stateIndex].Max()); //state = action; - double state = _qLearningProblem.SimulatedFPSValues(actionIndex, 4); - + //int actionIndex = matrixList.IndexOf(matrixList.Max()); + + //int actionIndex= _qTabl[0,0].ToList().IndexOf(_qTable[stateIndex,0,0].Max());//it would be nice if it works + double element = _qTable[stateIndex, 0, 0]; + for (int j = 0; j < _qLearningProblem.delta_length; j++) + for (int k = 0; k < _qLearningProblem.gamma_length; k++) + { + if (_qTable[stateIndex, j, k] >= element) + { + element = _qTable[stateIndex, j, k]; + jmax = j;//gamma values + kmax = k;//delta values + } + } + + double state = _qLearningProblem.SimulatedFPSValues(jmax,kmax);//FPS value + + //Console.WriteLine("Next state FPS"+state); double statefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - state)).First(); stateIndex = _qLearningProblem.fps_spaceIndex(statefps_space); - double gamma_action = _qLearningProblem.gamma_space[actionIndex]; - Console.WriteLine("Current FPS state: " + prev_state + ", Action: " + gamma_action + ", Next FPS state: " + state); + double gamma_action = _qLearningProblem.gamma_space[jmax]; + double delta_action = _qLearningProblem.delta_space[kmax]; + + //Console.WriteLine("Current FPS state" + prev_state); + Console.WriteLine("Current FPS state: " + prev_state + ", Gamma Action: " + gamma_action + ", delta Action: " + delta_action + ", Next FPS state: " + state); prev_state = state; - actions.Add(gamma_action); + gammaactions.Add(gamma_action); + deltaactions.Add(delta_action); stateFPS.Add(state); /* Newly added code to make it finite loop */ if (_qLearningProblem.GoalStateIsReached(state)) @@ -81,7 +104,8 @@ namespace QLearningApp.MachineLearning /*if (counter == 50) { break; }*/ } - result.Actions = actions.ToArray(); + result.gammaActions = gammaactions.ToArray(); + result.deltaActions = deltaactions.ToArray(); return result; } @@ -91,11 +115,11 @@ namespace QLearningApp.MachineLearning //Console.WriteLine("Enter targetted FPS value ={0}",targetFPS); double currentState = initialState;//FPS value - int counter = 0; + //int counter = 0; while (true) { currentState = TakeAction(currentState);//currentState= FPS, return should be FPS value - counter++; + //counter++; /*if (counter == 500) break;//will run 500 times*/ if (_qLearningProblem.GoalStateIsReached(currentState)) @@ -106,23 +130,41 @@ namespace QLearningApp.MachineLearning private double TakeAction(double currentState) { //var validActions = _qLearningProblem.GetValidActions(currentState); - int randomIndexActionGamma = _random.Next(0, _qLearningProblem.NumberOfActions); + int randomIndexActionGamma = _random.Next(0, _qLearningProblem.gamma_length); + + int randomIndexActionDelta = _random.Next(0, _qLearningProblem.delta_length); + //Console.WriteLine("randomIndexActionGamma ={0},randomIndexActionDelta ={1}", randomIndexActionGamma, randomIndexActionDelta); //int randomIndexActionDelta = _random.Next(0, _qLearningProblem.NumberOfActions);//choosing a random action from number of actions defined double gamma_action = _qLearningProblem.gamma_space[randomIndexActionGamma];//retrieving gamma value form randomIndexAction - // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma]; - double delta_action = 20; //fixed for now, can be changed in the future - int delta_actionIndex = 0; + // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma]; + double delta_action = _qLearningProblem.gamma_space[randomIndexActionDelta]; //fixed for now, can be changed in the future + //int delta_actionIndex = 0; double rho = 0.4; double saReward = _qLearningProblem.GetReward(currentState, gamma_action, delta_action); // Use rho to have a slowly & smoothly changing FPS value after an action is taken - double nextState = rho * currentState + (1 - rho) * _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, delta_actionIndex); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded + double nextState = rho * currentState + (1 - rho) * _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, randomIndexActionDelta); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded double nextStatefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - nextState)).First();//need to test first, looking for the closet value of the FPS - int nextStateIndex = _qLearningProblem.fps_spaceIndex(nextStatefps_space); - double nsReward = _qTable[nextStateIndex].Max(); + double nextStateIndex = _qLearningProblem.fps_spaceIndex(nextStatefps_space); + //double nsReward = _qTable[Convert.ToInt32(nextStateIndex), 0, 0]; + /* + Next state index is fixed and need to find out the highest value of Q based on it) + */ + double Max = _qTable[Convert.ToInt32(nextStateIndex), 0, 0]; + for (int j = 0; j < _qLearningProblem.delta_length; j++) + for (int k = 0; k < _qLearningProblem.gamma_length; k++) + { + if (_qTable[Convert.ToInt32(nextStateIndex), j, k] > Max) + { + Max = _qTable[Convert.ToInt32(nextStateIndex), j, k]; + } + } + + double nsReward = Max; + //Console.WriteLine("value of nsReward=" + nsReward); double qCurrentState = saReward + (_gamma * nsReward); int currentStateIndex = _qLearningProblem.fps_spaceIndex(currentState); - _qTable[currentStateIndex][randomIndexActionGamma] = qCurrentState; + _qTable[currentStateIndex, randomIndexActionGamma, randomIndexActionDelta] = qCurrentState; return nextState; } @@ -136,3 +178,14 @@ namespace QLearningApp.MachineLearning } } + + +/* + for (int i = 0; i < _qLearningProblem.NumberOfStates; i++)//check here + for (int j = 0; j < _qLearningProblem.delta_length; j++) + for (int k = 0; k < _qLearningProblem.gamma_length; k++) + { + matrixList.Add(_qTable[i, j, k]); + } + + */ \ No newline at end of file diff --git a/QLearningApp/MachineLearning/Models/QLearningStats.cs b/QLearningApp/MachineLearning/Models/QLearningStats.cs index 3cbb2664daf35e29eba91ba9249b9a63193482a1..5daac3908d2b3490bbb954d7e28944720fedf8e2 100644 --- a/QLearningApp/MachineLearning/Models/QLearningStats.cs +++ b/QLearningApp/MachineLearning/Models/QLearningStats.cs @@ -7,7 +7,8 @@ namespace QLearningApp.MachineLearning public double InitialState { get; set; } public double EndState { get; set; } public int Steps { get; set; } - public double[] Actions { get; set; } + public double[] gammaActions { get; set; } + public double[] deltaActions { get; set; } public double[] State { get; set; } @@ -16,9 +17,11 @@ namespace QLearningApp.MachineLearning StringBuilder sb = new StringBuilder(); sb.AppendLine($"Agent needed {Steps} steps to find the solution"); sb.AppendLine($"Agent Initial State: {InitialState}"); - /*foreach (var action in Actions) - sb.AppendLine($"Action: {action}");*/ - //sb.AppendLine($"Agent arrived at the goal state: {EndState}"); + foreach (var action in gammaActions) + sb.AppendLine($"gammaAction: {action}"); + foreach (var action in deltaActions) + sb.AppendLine($"deltaAction: {action}"); + sb.AppendLine($"Agent arrived at the goal state: {EndState}"); return sb.ToString(); } } diff --git a/QLearningApp/Program.cs b/QLearningApp/Program.cs index 597ca94eda7587c7ce022fd53e0d1bcdbd78aad8..776be1c1a7f30f69590539113e146ab0d8c83e64 100644 --- a/QLearningApp/Program.cs +++ b/QLearningApp/Program.cs @@ -35,8 +35,8 @@ namespace QLearningApp { var qLearningStats = qLearning.Run(initialStateIndex); Console.WriteLine(qLearningStats.ToString()); - var normalizedMatrix = qLearning.QTable.NormalizeMatrix(); - Console.Write(normalizedMatrix.Print()); + //var normalizedMatrix = qLearning.QTable.NormalizeMatrix(); + //Console.Write(qLearning.QTable.ToString()); } catch(Exception ex) { diff --git a/QLearningApp/RLsync.cs b/QLearningApp/RLsync.cs index 5287ef0613e93ecb092bc4090543f9943d067d16..2d5082881725f1d77b9e10febcffaa87b03f71a6 100644 --- a/QLearningApp/RLsync.cs +++ b/QLearningApp/RLsync.cs @@ -24,19 +24,21 @@ namespace QLearningApp target for syn window if it is 100ms, target should be given by user not hard-coded ini satse: 20 FPS, 100ms latency initial values 30FPS, APL 100ms--> Target APL 20ms, FPS >=15 - FPS, PE: 30FPS, PE: 25ms --> target + FPS, PE: 30FPS, PE: 25ms --> target trade -off */ - public int NumberOfStates => fps_space.Length; + public int NumberOfStates => fps_space.Length;//9 - public int NumberOfActions => gamma_space.Length * delta_space.Length;//6*6=36 + //public int NumberOfActions => gamma_space.Length * delta_space.Length;//6*6=36 + public int gamma_length => gamma_space.Length;//6 + public int delta_length => delta_space.Length;//6 public double GetReward(double currentState, double action_gamma, double action_delta) { //double reward = 1 / (System.Math.Abs(currentState - target_fps)) - alpha * action_gamma - beta * action_delta; //double reward = 1 / (1+System.Math.Abs(currentState - target_fps)) - alpha * action_gamma/100 - beta * action_delta/100; // gamma nand delta are normalized to [0, 1]; does not perform well when fps is very low - double reward = 1 / (1 + System.Math.Abs(currentState - Target_fps)) * (1 - alpha * action_gamma / 100 - beta * action_delta / 100); // gamma nand delta are normalized to [0, 1] + double reward = 1 / (1 + System.Math.Abs(currentState - Target_fps)) * (1 - alpha * System.Math.Abs(action_gamma / 100) - beta * System.Math.Abs(action_delta / 100)); // gamma nand delta are normalized to [0, 1] return reward; } @@ -44,15 +46,21 @@ namespace QLearningApp { //keeping deltaIndex as 4 so that RL algo could get the FPS 30 value, changed it to 2 to get the better Q values - deltaIndex = _random.Next(3,5);//trying randomness + //deltaIndex = _random.Next(3,5);//trying randomness double random_fps_noise = 3*(_random.NextDouble()-0.5); // adding uniform random noise to observed fps value // deltaIndex = 3; - double[,] FPS = new double[,] { { 0, 10, 10, 15, 15, 15 }, { 10, 12, 13, 15, 20, 22 }, { 10, 15, 15, 20, 25, 25 }, { 10, 20, 20, 25, 30, 30 }, { 10, 20, 25, 30, 30, 30 }, { 25, 25, 27, 30, 30, 30 } }; - // Further improvements in the simulation: depending on changing network delays, the FPS matrix can change, allowing higher FPS at smaller values of gamma and delta - return FPS[gammaIndex,deltaIndex] + random_fps_noise; + double[,] FPS = new double[,] { { 0, 8, 10, 12, 13, 15 }, + { 8, 10, 13, 15, 18, 20 }, + { 10, 12, 15, 20, 22, 25 }, + { 10, 15, 18, 20, 25, 28 }, + { 30, 30, 30, 30, 30, 30 }, + { 15, 20, 25, 27, 30, 30 } }; //{ 10, 15, 20, 22, 30, 30 }-second last row + // 6*6 matrix Further improvements in the simulation: depending on changing network delays, the FPS matrix can change, allowing higher FPS at smaller values of gamma and delta + return System.Math.Abs(FPS[gammaIndex,deltaIndex] + random_fps_noise); } public int fps_spaceIndex(double FPS) + { var fps_Index=0; for (int i = 0; i<fps_space.Length; i++) @@ -60,7 +68,6 @@ namespace QLearningApp if (FPS == fps_space[i]) fps_Index = i; //break; - } return fps_Index; } diff --git a/QLearningApp/bin/Debug/QLearningApp.exe b/QLearningApp/bin/Debug/QLearningApp.exe index 0251b165b78624be1647a5e81810119edd4a5a8a..ea93ca9266419cad2c275deb93337a461f88c6f1 100644 Binary files a/QLearningApp/bin/Debug/QLearningApp.exe and b/QLearningApp/bin/Debug/QLearningApp.exe differ diff --git a/QLearningApp/bin/Debug/QLearningApp.pdb b/QLearningApp/bin/Debug/QLearningApp.pdb index ff74b3d8ed7901b7810459b8e1eab4d326b71edd..212d01b5aaf1be5750912b2a43e1e1ccbc55e85b 100644 Binary files a/QLearningApp/bin/Debug/QLearningApp.pdb and b/QLearningApp/bin/Debug/QLearningApp.pdb differ diff --git a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache index b9971596f5ab326a4ea03dc9bc7679cfb48bb94b..c240c2684ac24abfea7de1369b85f99398d906b7 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache and b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache differ diff --git a/QLearningApp/obj/Debug/QLearningApp.exe b/QLearningApp/obj/Debug/QLearningApp.exe index 0251b165b78624be1647a5e81810119edd4a5a8a..ea93ca9266419cad2c275deb93337a461f88c6f1 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.exe and b/QLearningApp/obj/Debug/QLearningApp.exe differ diff --git a/QLearningApp/obj/Debug/QLearningApp.pdb b/QLearningApp/obj/Debug/QLearningApp.pdb index ff74b3d8ed7901b7810459b8e1eab4d326b71edd..212d01b5aaf1be5750912b2a43e1e1ccbc55e85b 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.pdb and b/QLearningApp/obj/Debug/QLearningApp.pdb differ