diff --git a/.vs/QLearningApp/v16/.suo b/.vs/QLearningApp/v16/.suo index c11f9588c5155375b9b6326e09a8499ec865b4e8..2c923d19ec055f7212ed0a292944aa1c237b41c9 100644 Binary files a/.vs/QLearningApp/v16/.suo and b/.vs/QLearningApp/v16/.suo differ diff --git a/QLearningApp/MachineLearning/Models/QLearning.cs b/QLearningApp/MachineLearning/Models/QLearning.cs index 29399190aa5aebab7c272689459ef40288274b4e..852a0c893cf1ca73ff06c9054f63b75251aa83c9 100644 --- a/QLearningApp/MachineLearning/Models/QLearning.cs +++ b/QLearningApp/MachineLearning/Models/QLearning.cs @@ -17,18 +17,23 @@ namespace QLearningApp.MachineLearning private RLSync _qLearningProblem; - public QLearning(double gamma, RLSync qLearningProblem) + public QLearning(double gamma, RLSync qLearningProblem, double TargetFPS) { _qLearningProblem = qLearningProblem; _gamma = gamma; // discount factor _qTable = new double[qLearningProblem.NumberOfStates][]; + /* Need to convert it in the 3-D array */ for (int i = 0; i < qLearningProblem.NumberOfStates; i++) _qTable[i] = new double[qLearningProblem.NumberOfActions]; for (int i = 0; i < qLearningProblem.NumberOfStates; i++) { for (int j = 0; j < qLearningProblem.NumberOfActions; j++) - _qTable[i][j] = 0; + _qTable[i][j] = 0; } + qLearningProblem.Target_fps = TargetFPS; + /*I can add PE and APL here*/ + Console.WriteLine("In QLearning, qLearningProblem.Target_fps = TargetFPS"+TargetFPS); + } public void TrainAgent(int numberOfIterations) @@ -40,7 +45,7 @@ namespace QLearningApp.MachineLearning } } - public QLearningStats Run(int initialStateIndex) + public QLearningStats Run(int initialStateIndex)//returning an object { if (initialStateIndex < 0 || initialStateIndex > _qLearningProblem.NumberOfStates) throw new ArgumentException($"The initial state can be between [0-{_qLearningProblem.NumberOfStates}", nameof(initialStateIndex)); @@ -58,21 +63,23 @@ namespace QLearningApp.MachineLearning int actionIndex = _qTable[stateIndex].ToList().IndexOf(_qTable[stateIndex].Max()); //state = action; double state = _qLearningProblem.SimulatedFPSValues(actionIndex, 4); - + double statefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - state)).First(); stateIndex = _qLearningProblem.fps_spaceIndex(statefps_space); double gamma_action = _qLearningProblem.gamma_space[actionIndex]; - Console.WriteLine("Current state: " + prev_state + ", Action: " + gamma_action + ", Next state: " + state); + Console.WriteLine("Current FPS state: " + prev_state + ", Action: " + gamma_action + ", Next FPS state: " + state); prev_state = state; actions.Add(gamma_action); stateFPS.Add(state); - /*if (_qLearningProblem.GoalStateIsReached(action)) + /* Newly added code to make it finite loop */ + if (_qLearningProblem.GoalStateIsReached(state)) { - result.EndState = action; + result.EndState = state; + //need to write code here to end a timer break; - }*/ - if (counter == 50) - { break; } + } + /*if (counter == 50) + { break; }*/ } result.Actions = actions.ToArray(); return result; @@ -81,14 +88,18 @@ namespace QLearningApp.MachineLearning private void InitializeEpisode(double initialState) { //int currentStateIndex = _qLearningProblem.fps_spaceIndex(initialState); + + //Console.WriteLine("Enter targetted FPS value ={0}",targetFPS); double currentState = initialState;//FPS value int counter = 0; while (true) { currentState = TakeAction(currentState);//currentState= FPS, return should be FPS value counter++; - if (counter == 500) - break;//will run 500 times + /*if (counter == 500) + break;//will run 500 times*/ + if (_qLearningProblem.GoalStateIsReached(currentState)) + break; } } @@ -98,14 +109,14 @@ namespace QLearningApp.MachineLearning int randomIndexActionGamma = _random.Next(0, _qLearningProblem.NumberOfActions); //int randomIndexActionDelta = _random.Next(0, _qLearningProblem.NumberOfActions);//choosing a random action from number of actions defined double gamma_action = _qLearningProblem.gamma_space[randomIndexActionGamma];//retrieving gamma value form randomIndexAction - // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma]; + // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma]; double delta_action = 20; //fixed for now, can be changed in the future int delta_actionIndex = 0; double rho = 0.4; double saReward = _qLearningProblem.GetReward(currentState, gamma_action, delta_action); // Use rho to have a slowly & smoothly changing FPS value after an action is taken - double nextState = rho * currentState + (1-rho) * _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, delta_actionIndex); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded + double nextState = rho * currentState + (1 - rho) * _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, delta_actionIndex); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded double nextStatefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - nextState)).First();//need to test first, looking for the closet value of the FPS int nextStateIndex = _qLearningProblem.fps_spaceIndex(nextStatefps_space); double nsReward = _qTable[nextStateIndex].Max(); @@ -122,5 +133,6 @@ namespace QLearningApp.MachineLearning return initialState; } + } } diff --git a/QLearningApp/Program.cs b/QLearningApp/Program.cs index 4f2299bb7fd2662aefc73170e8df11b076f89c36..597ca94eda7587c7ce022fd53e0d1bcdbd78aad8 100644 --- a/QLearningApp/Program.cs +++ b/QLearningApp/Program.cs @@ -10,17 +10,20 @@ namespace QLearningApp static void Main() { Console.WriteLine("step 1"); - var qLearning = new QLearning(0.8, new RLSync()); + /* Setting the parameters - To set the target FPS */ + double targetFPS = 0; + Console.WriteLine("Enter target FPS value"); + targetFPS= Convert.ToDouble(Console.ReadLine()); + Console.WriteLine("targetFPS from main="+targetFPS); + + var qLearning = new QLearning(0.8, new RLSync(),targetFPS); Console.WriteLine("Training Agent..."); qLearning.TrainAgent(2000); Console.WriteLine("step 2"); - Console.WriteLine("Training is Done!"); Console.WriteLine("Press any key to continue..."); - Console.WriteLine("step 3"); - Console.ReadLine(); do { diff --git a/QLearningApp/RLsync.cs b/QLearningApp/RLsync.cs index 53bd0f89c300eb6c5dab73e3d33d9d406eeeb23e..5287ef0613e93ecb092bc4090543f9943d067d16 100644 --- a/QLearningApp/RLsync.cs +++ b/QLearningApp/RLsync.cs @@ -8,18 +8,18 @@ namespace QLearningApp { private Random _random = new Random(); // Actions - public double[] gamma_space = { 10, 20, 30, 40, 50, 100 }; // the possible values of gamma - // private double[] delta_space = { 10, 20, 30, 40, 50, 100 }; // possible values of delta - public double[] delta_space = { 20 }; + public double[] gamma_space = { 10, 20, 30, 40, 50, 100 };//{ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30};// the possible values of gamma + public double[] delta_space = { 10, 20, 30, 40, 50, 100 }; // possible values of delta- 6 values + //public double[] delta_space = { 20 }; public double alpha = 0.2; //original value public double beta = 0.2;//original value // State space - public double[] fps_space = { 10, 12, 15, 17, 20, 22, 25, 27, 30 };//PE, APL 0-30 read it form file + public double[] fps_space = { 10, 12, 15, 17, 20, 22, 25, 27, 30 };//PE, APL 0-30 read it form file-9 states private double[][] rewards = new double[5][]; - public double target_fps = 30; + public double Target_fps { get; set;}//made it a proerty /* target for syn window if it is 100ms, target should be given by user not hard-coded ini satse: 20 FPS, 100ms latency @@ -30,13 +30,13 @@ namespace QLearningApp public int NumberOfStates => fps_space.Length; - public int NumberOfActions => gamma_space.Length * delta_space.Length; + public int NumberOfActions => gamma_space.Length * delta_space.Length;//6*6=36 public double GetReward(double currentState, double action_gamma, double action_delta) { //double reward = 1 / (System.Math.Abs(currentState - target_fps)) - alpha * action_gamma - beta * action_delta; //double reward = 1 / (1+System.Math.Abs(currentState - target_fps)) - alpha * action_gamma/100 - beta * action_delta/100; // gamma nand delta are normalized to [0, 1]; does not perform well when fps is very low - double reward = 1 / (1 + System.Math.Abs(currentState - target_fps)) * (1 - alpha * action_gamma / 100 - beta * action_delta / 100); // gamma nand delta are normalized to [0, 1] + double reward = 1 / (1 + System.Math.Abs(currentState - Target_fps)) * (1 - alpha * action_gamma / 100 - beta * action_delta / 100); // gamma nand delta are normalized to [0, 1] return reward; } @@ -64,7 +64,14 @@ namespace QLearningApp } return fps_Index; } + + public bool GoalStateIsReached(double CurrentState) + { + // Console.WriteLine("target FPS="+target_fps); + return CurrentState >= Target_fps; + } + } diff --git a/QLearningApp/bin/Debug/QLearningApp.exe b/QLearningApp/bin/Debug/QLearningApp.exe index e4f4aa93e341dc975f26aaa27d9d8eea57239c6a..0251b165b78624be1647a5e81810119edd4a5a8a 100644 Binary files a/QLearningApp/bin/Debug/QLearningApp.exe and b/QLearningApp/bin/Debug/QLearningApp.exe differ diff --git a/QLearningApp/bin/Debug/QLearningApp.pdb b/QLearningApp/bin/Debug/QLearningApp.pdb index 9d44aaa90b958891a04c1091fe1a6b715ea4889a..ff74b3d8ed7901b7810459b8e1eab4d326b71edd 100644 Binary files a/QLearningApp/bin/Debug/QLearningApp.pdb and b/QLearningApp/bin/Debug/QLearningApp.pdb differ diff --git a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache index 61c22a8f31600f3266c789c4539e43982ba04598..b9971596f5ab326a4ea03dc9bc7679cfb48bb94b 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache and b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache differ diff --git a/QLearningApp/obj/Debug/QLearningApp.exe b/QLearningApp/obj/Debug/QLearningApp.exe index e4f4aa93e341dc975f26aaa27d9d8eea57239c6a..0251b165b78624be1647a5e81810119edd4a5a8a 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.exe and b/QLearningApp/obj/Debug/QLearningApp.exe differ diff --git a/QLearningApp/obj/Debug/QLearningApp.pdb b/QLearningApp/obj/Debug/QLearningApp.pdb index 9d44aaa90b958891a04c1091fe1a6b715ea4889a..ff74b3d8ed7901b7810459b8e1eab4d326b71edd 100644 Binary files a/QLearningApp/obj/Debug/QLearningApp.pdb and b/QLearningApp/obj/Debug/QLearningApp.pdb differ