change reward function

a8964e1c · Anmulwar, Sweta V (PG/R - Elec Electronic Eng) · 225884e5 · a8964e1c · a8964e1c · a8964e1c
Commit a8964e1c authored 2 years ago by Anmulwar, Sweta V (PG/R - Elec Electronic Eng)
--- a/.vs/QLearningApp/v16/.suo
+++ b/.vs/QLearningApp/v16/.suo
--- a/QLearningApp/MachineLearning/Models/QLearning.cs
+++ b/QLearningApp/MachineLearning/Models/QLearning.cs
@@ -49,24 +49,29 @@ namespace QLearningApp.MachineLearning
            int stateIndex = initialStateIndex;
            int counter = 0;
            List<double> actions = new List<double>();
+            List<double> stateFPS = new List<double>();
+            double prev_state = result.InitialState;
            while (true)
            {
                result.Steps += 1;
                counter++;
                int actionIndex = _qTable[stateIndex].ToList().IndexOf(_qTable[stateIndex].Max());
                //state = action;
                double state = _qLearningProblem.SimulatedFPSValues(actionIndex, 4);
                double statefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - state)).First();
                stateIndex = _qLearningProblem.fps_spaceIndex(statefps_space);
                double gamma_action = _qLearningProblem.gamma_space[actionIndex];
+                Console.WriteLine("Current state: " + prev_state + ", Action: " + gamma_action + ", Next state: " + state);
+                prev_state = state;
                actions.Add(gamma_action);
+                stateFPS.Add(state);
                /*if (_qLearningProblem.GoalStateIsReached(action))
                {
                    result.EndState = action;
                    break;
                }*/
-                if (counter == 100)
+                if (counter == 50)
                { break; }
            }
            result.Actions = actions.ToArray();
@@ -82,8 +87,8 @@ namespace QLearningApp.MachineLearning
            {
                currentState = TakeAction(currentState);//currentState= FPS, return should be FPS value
                counter++;
-                if (counter == 100)
+                if (counter == 500)
-                    break;//will run 100 times
+                    break;//will run 500 times
            }
        }
@@ -96,9 +101,11 @@ namespace QLearningApp.MachineLearning
           // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma];
            double delta_action = 20; //fixed for now, can be changed in the future
            int delta_actionIndex = 0;
+            double rho = 0.4;
            double saReward = _qLearningProblem.GetReward(currentState, gamma_action, delta_action);
-            double nextState = _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, delta_actionIndex); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded
+            // Use rho to have a slowly & smoothly changing FPS value after an action is taken
+            double nextState = rho * currentState + (1-rho) * _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, delta_actionIndex); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded
            double nextStatefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - nextState)).First();//need to test first, looking for the closet value of the FPS
            int nextStateIndex = _qLearningProblem.fps_spaceIndex(nextStatefps_space);
            double nsReward = _qTable[nextStateIndex].Max();

--- a/QLearningApp/MachineLearning/Models/QLearningStats.cs
+++ b/QLearningApp/MachineLearning/Models/QLearningStats.cs
@@ -9,14 +9,16 @@ namespace QLearningApp.MachineLearning
        public int Steps { get; set; }
        public double[] Actions { get; set; }
+        public double[] State { get; set; }
        public override string ToString()
        {
            StringBuilder sb = new StringBuilder();
            sb.AppendLine($"Agent needed {Steps} steps to find the solution");
            sb.AppendLine($"Agent Initial State: {InitialState}");
-            foreach (var action in Actions)
+            /*foreach (var action in Actions)
-                sb.AppendLine($"Action: {action}");
+                sb.AppendLine($"Action: {action}");*/
-            sb.AppendLine($"Agent arrived at the goal state: {EndState}");
+            //sb.AppendLine($"Agent arrived at the goal state: {EndState}");
            return sb.ToString();
        }
    }

--- a/QLearningApp/RLsync.cs
+++ b/QLearningApp/RLsync.cs
 using QLearningApp.MachineLearning;
+using System;
 using System.Collections.Generic;
 namespace QLearningApp
 {
    public class RLSync
    {
+        private Random _random = new Random();
        // Actions
        public double[] gamma_space = { 10, 20, 30, 40, 50, 100 }; // the possible values of gamma
        // private double[] delta_space = { 10, 20, 30, 40, 50, 100 }; // possible values of delta
        public double[] delta_space = { 20 };
-        public double alpha = 0.5; //original value
+        public double alpha = 0.2; //original value
-        public double beta = 0.5;//original value
+        public double beta = 0.2;//original value
        // State space
-        public double[] fps_space = { 10, 15, 20, 25, 30 };
+        public double[] fps_space = { 10, 12, 15, 17, 20, 22, 25, 27, 30 };//PE, APL 0-30 read it form file 
        private double[][] rewards = new double[5][];
        public double target_fps = 30;
+        /*
+         target for syn window if it is 100ms, target should be given by user not hard-coded
+         ini satse: 20 FPS, 100ms latency 
+         initial values 30FPS, APL 100ms--> Target APL 20ms, FPS >=15 
+        FPS, PE: 30FPS, PE: 25ms --> target 
+         trade -off
+         */
        public int NumberOfStates => fps_space.Length;
@@ -26,17 +35,21 @@ namespace QLearningApp
        public double GetReward(double currentState, double action_gamma, double action_delta)
        {
            //double reward = 1 / (System.Math.Abs(currentState - target_fps)) - alpha * action_gamma - beta * action_delta;
-            double reward = 1 / (1+System.Math.Abs(currentState - target_fps)) - alpha * action_gamma/100 - beta * action_delta/100; // gamma nand delta are normalized to [0, 1]
+            //double reward = 1 / (1+System.Math.Abs(currentState - target_fps)) - alpha * action_gamma/100 - beta * action_delta/100; // gamma nand delta are normalized to [0, 1]; does not perform well when fps is very low
+            double reward = 1 / (1 + System.Math.Abs(currentState - target_fps)) * (1 - alpha * action_gamma / 100 - beta * action_delta / 100); // gamma nand delta are normalized to [0, 1]
            return reward;
        }
        public double SimulatedFPSValues(int gammaIndex, int deltaIndex) //these values are for trial and this is the simulated environment
        {
            //keeping deltaIndex as 4 so that RL algo could get the FPS 30 value, changed it to 2 to get the better Q values
-            deltaIndex = 4;
+            deltaIndex = _random.Next(3,5);//trying randomness
+            double random_fps_noise = 3*(_random.NextDouble()-0.5); // adding uniform random noise to observed fps value
+            // deltaIndex = 3;
            double[,] FPS = new double[,] { { 0, 10, 10, 15, 15, 15 }, { 10, 12, 13, 15, 20, 22 }, { 10, 15, 15, 20, 25, 25 }, { 10, 20, 20, 25, 30, 30 }, { 10, 20, 25, 30, 30, 30 }, { 25, 25, 27, 30, 30, 30 } };
-            return FPS[gammaIndex,deltaIndex];
+            // Further improvements in the simulation: depending on changing network delays, the FPS matrix can change, allowing higher FPS at smaller values of gamma and delta
+            return FPS[gammaIndex,deltaIndex] + random_fps_noise;
        }
        public int fps_spaceIndex(double FPS)

--- a/QLearningApp/bin/Debug/QLearningApp.exe
+++ b/QLearningApp/bin/Debug/QLearningApp.exe
--- a/QLearningApp/bin/Debug/QLearningApp.pdb
+++ b/QLearningApp/bin/Debug/QLearningApp.pdb
--- a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache
+++ b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache
--- a/QLearningApp/obj/Debug/QLearningApp.exe
+++ b/QLearningApp/obj/Debug/QLearningApp.exe
--- a/QLearningApp/obj/Debug/QLearningApp.pdb
+++ b/QLearningApp/obj/Debug/QLearningApp.pdb