diff --git a/.vs/QLearningApp/v16/.suo b/.vs/QLearningApp/v16/.suo index 4440aaa92ef0803fdda288649133ffcf000c393b..103c2119432c3a8456e8ba30ad974c5e03404fac 100644 Binary files a/.vs/QLearningApp/v16/.suo and b/.vs/QLearningApp/v16/.suo differ diff --git a/QLearningApp/MachineLearning/Models/QLearning.cs b/QLearningApp/MachineLearning/Models/QLearning.cs index 4734e96d441d60e24cffece6941177272453f1ce..acd1ef5149cbb985f3b2b097fe7fa8ac5f7c5a32 100644 --- a/QLearningApp/MachineLearning/Models/QLearning.cs +++ b/QLearningApp/MachineLearning/Models/QLearning.cs @@ -115,15 +115,15 @@ namespace QLearningApp.MachineLearning //Console.WriteLine("Enter targetted FPS value ={0}",targetFPS); double currentState = initialState;//FPS value - //int counter = 0; + int counter = 0; while (true) { currentState = TakeAction(currentState);//currentState= FPS, return should be FPS value - //counter++; - /*if (counter == 500) + counter++; + if (counter == 2000) break;//will run 500 times*/ - if (_qLearningProblem.GoalStateIsReached(currentState)) - break; + /*if (_qLearningProblem.GoalStateIsReached(currentState)) + break;*/ } } @@ -131,7 +131,6 @@ namespace QLearningApp.MachineLearning { //var validActions = _qLearningProblem.GetValidActions(currentState); int randomIndexActionGamma = _random.Next(0, _qLearningProblem.gamma_length); - int randomIndexActionDelta = _random.Next(0, _qLearningProblem.delta_length); //Console.WriteLine("randomIndexActionGamma ={0},randomIndexActionDelta ={1}", randomIndexActionGamma, randomIndexActionDelta); //int randomIndexActionDelta = _random.Next(0, _qLearningProblem.NumberOfActions);//choosing a random action from number of actions defined @@ -139,7 +138,7 @@ namespace QLearningApp.MachineLearning // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma]; double delta_action = _qLearningProblem.gamma_space[randomIndexActionDelta]; //fixed for now, can be changed in the future //int delta_actionIndex = 0; - double rho = 0.4; + double rho = 0.1;//0.4; double saReward = _qLearningProblem.GetReward(currentState, gamma_action, delta_action); // Use rho to have a slowly & smoothly changing FPS value after an action is taken @@ -162,7 +161,7 @@ namespace QLearningApp.MachineLearning double nsReward = Max; //Console.WriteLine("value of nsReward=" + nsReward); - double qCurrentState = saReward + (_gamma * nsReward); + double qCurrentState = saReward + (_gamma * nsReward);//discount factor=0.8 int currentStateIndex = _qLearningProblem.fps_spaceIndex(currentState); _qTable[currentStateIndex, randomIndexActionGamma, randomIndexActionDelta] = qCurrentState; return nextState; diff --git a/QLearningApp/Program.cs b/QLearningApp/Program.cs index 776be1c1a7f30f69590539113e146ab0d8c83e64..c9d9244429d65a60a1483ed8dc0cf914264374f9 100644 --- a/QLearningApp/Program.cs +++ b/QLearningApp/Program.cs @@ -18,7 +18,7 @@ namespace QLearningApp var qLearning = new QLearning(0.8, new RLSync(),targetFPS); Console.WriteLine("Training Agent..."); - qLearning.TrainAgent(2000); + qLearning.TrainAgent(3000); Console.WriteLine("step 2"); Console.WriteLine("Training is Done!"); @@ -27,6 +27,8 @@ namespace QLearningApp Console.ReadLine(); do { + double[,] Qdisplay = new double[6, 6]; + Console.WriteLine($"Enter initial state Index. Number between 0 and the number of states (5). Press 'q' to exit..."); int initialStateIndex = 0; if (!int.TryParse(Console.ReadLine(), out initialStateIndex)) break; @@ -35,15 +37,75 @@ namespace QLearningApp { var qLearningStats = qLearning.Run(initialStateIndex); Console.WriteLine(qLearningStats.ToString()); + /* Display 3 dimesional Q matrix */ + + for (int k = 0; k < qLearning.QTable.GetLength(0); k++) + { + for (int i = 0; i < qLearning.QTable.GetLength(1); i++) + { + for (int j = 0; j < qLearning.QTable.GetLength(2); j++) + { + Qdisplay[i, j] = qLearning.QTable[k, i, j]; + //Console.Write(Math.Round(Qdisplay[i, j] * 10) / 10 + "\t"); + } + } + /* To print normalized matrix */ + var normalizedMatrix = NormalizeMatrix(Qdisplay); + for (int i = 0; i < Qdisplay.GetLength(0); i++) + { + for (int j = 0; j < Qdisplay.GetLength(1); j++) + { + + Console.Write(Math.Round(normalizedMatrix[i, j] * 10) / 10 + "\t"); + } + Console.WriteLine(); + } + Console.WriteLine(); + } + + //Console.Write(Qdisplay.ToString()); + //var normalizedMatrix = NormalizeMatrix(Qdisplay); + //Console.Write(qLearning.QTable.ToString()); //var normalizedMatrix = qLearning.QTable.NormalizeMatrix(); //Console.Write(qLearning.QTable.ToString()); } - catch(Exception ex) + catch (Exception ex) { Console.WriteLine($"Error: {ex.Message}"); } } while (true); } + public static double[,] NormalizeMatrix(double[,] matrix) + { + /* To normalize matrix*/ + var maxElement = GetMaxElement(matrix); + + var normalizedMatrix = new double[6,6]; + for (int i = 0; i < matrix.GetLength(0); i++)//0-rows, 1-col,2-z values + { + //normalizedMatrix[i] = new double[matrix.Length(0)]; + for (int j = 0; j < matrix.GetLength(1); j++) + { + if (matrix[i,j] != 0) + normalizedMatrix[i,j] = Math.Round((matrix[i, j] * 100) / maxElement, 0); + } + } + return normalizedMatrix; + } + + public static double GetMaxElement(double[,] matrix) + { + double maxElement = 0; + for (int i = 0; i < matrix.GetLength(0); i++) + { + for (int j = 0; j < matrix.GetLength(1); j++) + { + if (matrix[i,j] > maxElement) + maxElement = matrix[i,j]; + } + } + return maxElement; + } } } diff --git a/QLearningApp/RLsync.cs b/QLearningApp/RLsync.cs index 2d5082881725f1d77b9e10febcffaa87b03f71a6..344e77cb02ca13103a1b85bd03f540415ed2d331 100644 --- a/QLearningApp/RLsync.cs +++ b/QLearningApp/RLsync.cs @@ -12,8 +12,8 @@ namespace QLearningApp public double[] delta_space = { 10, 20, 30, 40, 50, 100 }; // possible values of delta- 6 values //public double[] delta_space = { 20 }; - public double alpha = 0.2; //original value - public double beta = 0.2;//original value + public double alpha = 0.2; //original value 0.4 + public double beta = 0.2;//original value 0.4 // State space public double[] fps_space = { 10, 12, 15, 17, 20, 22, 25, 27, 30 };//PE, APL 0-30 read it form file-9 states @@ -21,10 +21,10 @@ namespace QLearningApp private double[][] rewards = new double[5][]; public double Target_fps { get; set;}//made it a proerty /* - target for syn window if it is 100ms, target should be given by user not hard-coded + target for syn window if it is 100ms, target should be given by user not hard-coded - done ini satse: 20 FPS, 100ms latency initial values 30FPS, APL 100ms--> Target APL 20ms, FPS >=15 - FPS, PE: 30FPS, PE: 25ms --> target + FPS, PE: 30FPS, PE: 25ms --> target trade -off */ @@ -38,7 +38,7 @@ namespace QLearningApp { //double reward = 1 / (System.Math.Abs(currentState - target_fps)) - alpha * action_gamma - beta * action_delta; //double reward = 1 / (1+System.Math.Abs(currentState - target_fps)) - alpha * action_gamma/100 - beta * action_delta/100; // gamma nand delta are normalized to [0, 1]; does not perform well when fps is very low - double reward = 1 / (1 + System.Math.Abs(currentState - Target_fps)) * (1 - alpha * System.Math.Abs(action_gamma / 100) - beta * System.Math.Abs(action_delta / 100)); // gamma nand delta are normalized to [0, 1] + double reward = 1 / (1 + System.Math.Abs(currentState - Target_fps)) * (1 - alpha * action_gamma / 100 - beta * action_delta / 100); // gamma nand delta are normalized to [0, 1], so that there is always a positive reward return reward; } @@ -53,7 +53,7 @@ namespace QLearningApp { 8, 10, 13, 15, 18, 20 }, { 10, 12, 15, 20, 22, 25 }, { 10, 15, 18, 20, 25, 28 }, - { 30, 30, 30, 30, 30, 30 }, + { 10, 15, 20, 22, 30, 30 }, { 15, 20, 25, 27, 30, 30 } }; //{ 10, 15, 20, 22, 30, 30 }-second last row // 6*6 matrix Further improvements in the simulation: depending on changing network delays, the FPS matrix can change, allowing higher FPS at smaller values of gamma and delta return System.Math.Abs(FPS[gammaIndex,deltaIndex] + random_fps_noise); diff --git a/QLearningApp/bin/Debug/QLearningApp.exe b/QLearningApp/bin/Debug/QLearningApp.exe index ea93ca9266419cad2c275deb93337a461f88c6f1..69860436b49a7b74247d4dd3d9d92820600b4015 100644 Binary files a/QLearningApp/bin/Debug/QLearningApp.exe and b/QLearningApp/bin/Debug/QLearningApp.exe differ diff --git a/QLearningApp/bin/Debug/QLearningApp.pdb b/QLearningApp/bin/Debug/QLearningApp.pdb index 212d01b5aaf1be5750912b2a43e1e1ccbc55e85b..64633aa518b875e67a87556f189d8bc4ade484fb 100644 Binary files a/QLearningApp/bin/Debug/QLearningApp.pdb and b/QLearningApp/bin/Debug/QLearningApp.pdb differ diff --git a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache index c240c2684ac24abfea7de1369b85f99398d906b7..81441d12ea59e70c6bb50a099f24c3d712ed2b57 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache and b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache differ diff --git a/QLearningApp/obj/Debug/QLearningApp.exe b/QLearningApp/obj/Debug/QLearningApp.exe index ea93ca9266419cad2c275deb93337a461f88c6f1..69860436b49a7b74247d4dd3d9d92820600b4015 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.exe and b/QLearningApp/obj/Debug/QLearningApp.exe differ diff --git a/QLearningApp/obj/Debug/QLearningApp.pdb b/QLearningApp/obj/Debug/QLearningApp.pdb index 212d01b5aaf1be5750912b2a43e1e1ccbc55e85b..64633aa518b875e67a87556f189d8bc4ade484fb 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.pdb and b/QLearningApp/obj/Debug/QLearningApp.pdb differ