diff --git a/.vs/QLearningApp/v16/.suo b/.vs/QLearningApp/v16/.suo
index 2c923d19ec055f7212ed0a292944aa1c237b41c9..4440aaa92ef0803fdda288649133ffcf000c393b 100644
Binary files a/.vs/QLearningApp/v16/.suo and b/.vs/QLearningApp/v16/.suo differ
diff --git a/QLearningApp/MachineLearning/Models/QLearning.cs b/QLearningApp/MachineLearning/Models/QLearning.cs
index 852a0c893cf1ca73ff06c9054f63b75251aa83c9..4734e96d441d60e24cffece6941177272453f1ce 100644
--- a/QLearningApp/MachineLearning/Models/QLearning.cs
+++ b/QLearningApp/MachineLearning/Models/QLearning.cs
@@ -12,23 +12,27 @@ namespace QLearningApp.MachineLearning
         private double _gamma;
         public double Gamma { get => _gamma; }
 
-        private double[][] _qTable;
-        public double[][] QTable { get => _qTable; }
+        private double[,,] _qTable;
+        public double[,,] QTable { get => _qTable; }
 
         private RLSync _qLearningProblem;
+        List<double> matrixList = new List<double>();
 
         public QLearning(double gamma, RLSync qLearningProblem, double TargetFPS)
         {
             _qLearningProblem = qLearningProblem;
             _gamma = gamma; // discount factor 
-            _qTable = new double[qLearningProblem.NumberOfStates][];
+            _qTable = new double[qLearningProblem.NumberOfStates,qLearningProblem.gamma_length,qLearningProblem.delta_length];
             /* Need to convert it in the 3-D array */
-            for (int i = 0; i < qLearningProblem.NumberOfStates; i++)
-                _qTable[i] = new double[qLearningProblem.NumberOfActions];
+            /*for (int i = 0; i < qLearningProblem.NumberOfStates; i++) //fps_space.Length;//9
+                _qTable[i] = new double[qLearningProblem.NumberOfActions];*/ //6*6=36
             for (int i = 0; i < qLearningProblem.NumberOfStates; i++)
             {
-                for (int j = 0; j < qLearningProblem.NumberOfActions; j++)
-                    _qTable[i][j] = 0;
+                for (int j = 0; j < qLearningProblem.gamma_length; j++)
+                {
+                    for (int k = 0; k < qLearningProblem.delta_length; k++)
+                        _qTable[i,j,k] = 0;
+                }
             }
             qLearningProblem.Target_fps = TargetFPS;
             /*I can add PE and APL here*/
@@ -48,28 +52,47 @@ namespace QLearningApp.MachineLearning
         public QLearningStats Run(int initialStateIndex)//returning an object
         {
             if (initialStateIndex < 0 || initialStateIndex > _qLearningProblem.NumberOfStates) throw new ArgumentException($"The initial state can be between [0-{_qLearningProblem.NumberOfStates}", nameof(initialStateIndex));
-
             var result = new QLearningStats();
             result.InitialState = _qLearningProblem.fps_space[initialStateIndex];
             int stateIndex = initialStateIndex;
-            int counter = 0;
-            List<double> actions = new List<double>();
+            int jmax = 0, kmax = 0;
+            List<double> gammaactions = new List<double>();
+            List<double> deltaactions = new List<double>();
             List<double> stateFPS = new List<double>();
             double prev_state = result.InitialState;
             while (true)
             {
                 result.Steps += 1;
-                counter++;
-                int actionIndex = _qTable[stateIndex].ToList().IndexOf(_qTable[stateIndex].Max());
+                //int actionIndex = _qTable[stateIndex].ToList().IndexOf(_qTable[stateIndex].Max());
                 //state = action;
-                double state = _qLearningProblem.SimulatedFPSValues(actionIndex, 4);
-
+                //int actionIndex = matrixList.IndexOf(matrixList.Max());
+                
+                //int actionIndex= _qTabl[0,0].ToList().IndexOf(_qTable[stateIndex,0,0].Max());//it would be nice if it works
+                double element = _qTable[stateIndex, 0, 0];
+                for (int j = 0; j < _qLearningProblem.delta_length; j++)
+                        for (int k = 0; k < _qLearningProblem.gamma_length; k++)
+                        {
+                            if (_qTable[stateIndex, j, k] >= element)
+                            {
+                                element = _qTable[stateIndex, j, k];
+                                jmax = j;//gamma values
+                                kmax = k;//delta values
+                            }
+                        }
+                
+                double state = _qLearningProblem.SimulatedFPSValues(jmax,kmax);//FPS value
+
+                //Console.WriteLine("Next state FPS"+state);
                 double statefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - state)).First();
                 stateIndex = _qLearningProblem.fps_spaceIndex(statefps_space);
-                double gamma_action = _qLearningProblem.gamma_space[actionIndex];
-                Console.WriteLine("Current FPS state: " + prev_state + ", Action: " + gamma_action + ", Next FPS state: " + state);
+                double gamma_action = _qLearningProblem.gamma_space[jmax];
+                double delta_action = _qLearningProblem.delta_space[kmax];
+               
+                //Console.WriteLine("Current FPS state" + prev_state);
+                Console.WriteLine("Current FPS state: " + prev_state + ", Gamma Action: " + gamma_action + ", delta Action: " + delta_action + ", Next FPS state: " + state);
                 prev_state = state;
-                actions.Add(gamma_action);
+                gammaactions.Add(gamma_action);
+                deltaactions.Add(delta_action);
                 stateFPS.Add(state);
                 /* Newly added code to make it finite loop */
                 if (_qLearningProblem.GoalStateIsReached(state))
@@ -81,7 +104,8 @@ namespace QLearningApp.MachineLearning
                 /*if (counter == 50)
                 { break; }*/
             }
-            result.Actions = actions.ToArray();
+            result.gammaActions = gammaactions.ToArray();
+            result.deltaActions = deltaactions.ToArray();
             return result;
         }
 
@@ -91,11 +115,11 @@ namespace QLearningApp.MachineLearning
 
             //Console.WriteLine("Enter targetted FPS value ={0}",targetFPS);
             double currentState = initialState;//FPS value
-            int counter = 0;
+            //int counter = 0;
             while (true)
             {
                 currentState = TakeAction(currentState);//currentState= FPS, return should be FPS value
-                counter++;
+                //counter++;
                 /*if (counter == 500)
                     break;//will run 500 times*/
                 if (_qLearningProblem.GoalStateIsReached(currentState))
@@ -106,23 +130,41 @@ namespace QLearningApp.MachineLearning
         private double TakeAction(double currentState)
         {
             //var validActions = _qLearningProblem.GetValidActions(currentState);
-            int randomIndexActionGamma = _random.Next(0, _qLearningProblem.NumberOfActions);
+            int randomIndexActionGamma = _random.Next(0, _qLearningProblem.gamma_length);
+            
+            int randomIndexActionDelta = _random.Next(0, _qLearningProblem.delta_length);
+            //Console.WriteLine("randomIndexActionGamma ={0},randomIndexActionDelta ={1}", randomIndexActionGamma, randomIndexActionDelta);
             //int randomIndexActionDelta = _random.Next(0, _qLearningProblem.NumberOfActions);//choosing a random action from number of actions defined
             double gamma_action = _qLearningProblem.gamma_space[randomIndexActionGamma];//retrieving gamma value form randomIndexAction
-                                                                                        // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma];
-            double delta_action = 20; //fixed for now, can be changed in the future
-            int delta_actionIndex = 0;
+                                                                                    // double delta_action = _qLearningProblem.delta_space[randomIndexActionGamma];
+            double delta_action = _qLearningProblem.gamma_space[randomIndexActionDelta]; //fixed for now, can be changed in the future
+            //int delta_actionIndex = 0;
             double rho = 0.4;
 
             double saReward = _qLearningProblem.GetReward(currentState, gamma_action, delta_action);
             // Use rho to have a slowly & smoothly changing FPS value after an action is taken
-            double nextState = rho * currentState + (1 - rho) * _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, delta_actionIndex); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded
+            double nextState = rho * currentState + (1 - rho) * _qLearningProblem.SimulatedFPSValues(randomIndexActionGamma, randomIndexActionDelta); // FPS for time t+1, delta_actionIndex changes to 4 in the RLSync as it is hardcoded
             double nextStatefps_space = _qLearningProblem.fps_space.OrderBy(x => Math.Abs((long)x - nextState)).First();//need to test first, looking for the closet value of the FPS
-            int nextStateIndex = _qLearningProblem.fps_spaceIndex(nextStatefps_space);
-            double nsReward = _qTable[nextStateIndex].Max();
+            double nextStateIndex = _qLearningProblem.fps_spaceIndex(nextStatefps_space);
+            //double nsReward = _qTable[Convert.ToInt32(nextStateIndex), 0, 0];
+            /* 
+             Next state index is fixed and need to find out the highest value of Q based on it)
+             */
+            double Max = _qTable[Convert.ToInt32(nextStateIndex), 0, 0];
+            for (int j = 0; j < _qLearningProblem.delta_length; j++)
+                for (int k = 0; k < _qLearningProblem.gamma_length; k++)
+                {
+                    if (_qTable[Convert.ToInt32(nextStateIndex), j, k] > Max)
+                    {
+                        Max = _qTable[Convert.ToInt32(nextStateIndex), j, k];
+                    }
+                }
+            
+            double nsReward = Max;
+            //Console.WriteLine("value of nsReward=" + nsReward);
             double qCurrentState = saReward + (_gamma * nsReward);
             int currentStateIndex = _qLearningProblem.fps_spaceIndex(currentState);
-            _qTable[currentStateIndex][randomIndexActionGamma] = qCurrentState;
+            _qTable[currentStateIndex, randomIndexActionGamma, randomIndexActionDelta] = qCurrentState;
             return nextState;
         }
 
@@ -136,3 +178,14 @@ namespace QLearningApp.MachineLearning
 
     }
 }
+
+
+/*
+ for (int i = 0; i < _qLearningProblem.NumberOfStates; i++)//check here
+                    for (int j = 0; j < _qLearningProblem.delta_length; j++)
+                        for (int k = 0; k < _qLearningProblem.gamma_length; k++)
+                        {
+                            matrixList.Add(_qTable[i, j, k]);
+                        }
+ 
+ */
\ No newline at end of file
diff --git a/QLearningApp/MachineLearning/Models/QLearningStats.cs b/QLearningApp/MachineLearning/Models/QLearningStats.cs
index 3cbb2664daf35e29eba91ba9249b9a63193482a1..5daac3908d2b3490bbb954d7e28944720fedf8e2 100644
--- a/QLearningApp/MachineLearning/Models/QLearningStats.cs
+++ b/QLearningApp/MachineLearning/Models/QLearningStats.cs
@@ -7,7 +7,8 @@ namespace QLearningApp.MachineLearning
         public double InitialState { get; set; }
         public double EndState { get; set; }
         public int Steps { get; set; }
-        public double[] Actions { get; set; }
+        public double[] gammaActions { get; set; }
+        public double[] deltaActions { get; set; }
 
         public double[] State { get; set; }
 
@@ -16,9 +17,11 @@ namespace QLearningApp.MachineLearning
             StringBuilder sb = new StringBuilder();
             sb.AppendLine($"Agent needed {Steps} steps to find the solution");
             sb.AppendLine($"Agent Initial State: {InitialState}");
-            /*foreach (var action in Actions)
-                sb.AppendLine($"Action: {action}");*/
-            //sb.AppendLine($"Agent arrived at the goal state: {EndState}");
+            foreach (var action in gammaActions)
+                sb.AppendLine($"gammaAction: {action}");
+            foreach (var action in deltaActions)
+                sb.AppendLine($"deltaAction: {action}");
+            sb.AppendLine($"Agent arrived at the goal state: {EndState}");
             return sb.ToString();
         }
     }
diff --git a/QLearningApp/Program.cs b/QLearningApp/Program.cs
index 597ca94eda7587c7ce022fd53e0d1bcdbd78aad8..776be1c1a7f30f69590539113e146ab0d8c83e64 100644
--- a/QLearningApp/Program.cs
+++ b/QLearningApp/Program.cs
@@ -35,8 +35,8 @@ namespace QLearningApp
                 {
                     var qLearningStats = qLearning.Run(initialStateIndex);
                     Console.WriteLine(qLearningStats.ToString());
-                    var normalizedMatrix = qLearning.QTable.NormalizeMatrix();
-                    Console.Write(normalizedMatrix.Print());
+                    //var normalizedMatrix = qLearning.QTable.NormalizeMatrix();
+                    //Console.Write(qLearning.QTable.ToString());
                 }
                 catch(Exception ex)
                 {
diff --git a/QLearningApp/RLsync.cs b/QLearningApp/RLsync.cs
index 5287ef0613e93ecb092bc4090543f9943d067d16..2d5082881725f1d77b9e10febcffaa87b03f71a6 100644
--- a/QLearningApp/RLsync.cs
+++ b/QLearningApp/RLsync.cs
@@ -24,19 +24,21 @@ namespace QLearningApp
          target for syn window if it is 100ms, target should be given by user not hard-coded
          ini satse: 20 FPS, 100ms latency 
          initial values 30FPS, APL 100ms--> Target APL 20ms, FPS >=15 
-        FPS, PE: 30FPS, PE: 25ms --> target 
+            FPS, PE: 30FPS, PE: 25ms --> target 
          trade -off
          */
 
-        public int NumberOfStates => fps_space.Length;
+        public int NumberOfStates => fps_space.Length;//9
 
-        public int NumberOfActions => gamma_space.Length * delta_space.Length;//6*6=36
+        //public int NumberOfActions => gamma_space.Length * delta_space.Length;//6*6=36
+        public int gamma_length => gamma_space.Length;//6
+        public int delta_length => delta_space.Length;//6
 
         public double GetReward(double currentState, double action_gamma, double action_delta)
         {
             //double reward = 1 / (System.Math.Abs(currentState - target_fps)) - alpha * action_gamma - beta * action_delta;
             //double reward = 1 / (1+System.Math.Abs(currentState - target_fps)) - alpha * action_gamma/100 - beta * action_delta/100; // gamma nand delta are normalized to [0, 1]; does not perform well when fps is very low
-            double reward = 1 / (1 + System.Math.Abs(currentState - Target_fps)) * (1 - alpha * action_gamma / 100 - beta * action_delta / 100); // gamma nand delta are normalized to [0, 1]
+            double reward = 1 / (1 + System.Math.Abs(currentState - Target_fps)) * (1 - alpha * System.Math.Abs(action_gamma / 100) - beta * System.Math.Abs(action_delta / 100)); // gamma nand delta are normalized to [0, 1]
             return reward;
         }
 
@@ -44,15 +46,21 @@ namespace QLearningApp
         {
 
             //keeping deltaIndex as 4 so that RL algo could get the FPS 30 value, changed it to 2 to get the better Q values
-            deltaIndex = _random.Next(3,5);//trying randomness
+            //deltaIndex = _random.Next(3,5);//trying randomness
             double random_fps_noise = 3*(_random.NextDouble()-0.5); // adding uniform random noise to observed fps value
             // deltaIndex = 3;
-            double[,] FPS = new double[,] { { 0, 10, 10, 15, 15, 15 }, { 10, 12, 13, 15, 20, 22 }, { 10, 15, 15, 20, 25, 25 }, { 10, 20, 20, 25, 30, 30 }, { 10, 20, 25, 30, 30, 30 }, { 25, 25, 27, 30, 30, 30 } };
-            // Further improvements in the simulation: depending on changing network delays, the FPS matrix can change, allowing higher FPS at smaller values of gamma and delta
-            return FPS[gammaIndex,deltaIndex] + random_fps_noise;
+            double[,] FPS = new double[,] { { 0, 8, 10, 12, 13, 15 },
+                                            { 8, 10, 13, 15, 18, 20 },
+                                            { 10, 12, 15, 20, 22, 25 },
+                                            { 10, 15, 18, 20, 25, 28 }, 
+                                            { 30, 30, 30, 30, 30, 30 }, 
+                                            { 15, 20, 25, 27, 30, 30 } }; //{ 10, 15, 20, 22, 30, 30 }-second last row
+            // 6*6 matrix Further improvements in the simulation: depending on changing network delays, the FPS matrix can change, allowing higher FPS at smaller values of gamma and delta
+            return System.Math.Abs(FPS[gammaIndex,deltaIndex] + random_fps_noise);
         }
 
         public int fps_spaceIndex(double FPS)
+
         {
             var fps_Index=0;
             for (int i = 0; i<fps_space.Length; i++)
@@ -60,7 +68,6 @@ namespace QLearningApp
                 if (FPS == fps_space[i])
                     fps_Index = i;
                     //break;
-            
             }
             return fps_Index;
         }
diff --git a/QLearningApp/bin/Debug/QLearningApp.exe b/QLearningApp/bin/Debug/QLearningApp.exe
index 0251b165b78624be1647a5e81810119edd4a5a8a..ea93ca9266419cad2c275deb93337a461f88c6f1 100644
Binary files a/QLearningApp/bin/Debug/QLearningApp.exe and b/QLearningApp/bin/Debug/QLearningApp.exe differ
diff --git a/QLearningApp/bin/Debug/QLearningApp.pdb b/QLearningApp/bin/Debug/QLearningApp.pdb
index ff74b3d8ed7901b7810459b8e1eab4d326b71edd..212d01b5aaf1be5750912b2a43e1e1ccbc55e85b 100644
Binary files a/QLearningApp/bin/Debug/QLearningApp.pdb and b/QLearningApp/bin/Debug/QLearningApp.pdb differ
diff --git a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache
index b9971596f5ab326a4ea03dc9bc7679cfb48bb94b..c240c2684ac24abfea7de1369b85f99398d906b7 100644
Binary files a/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache and b/QLearningApp/obj/Debug/QLearningApp.csprojAssemblyReference.cache differ
diff --git a/QLearningApp/obj/Debug/QLearningApp.exe b/QLearningApp/obj/Debug/QLearningApp.exe
index 0251b165b78624be1647a5e81810119edd4a5a8a..ea93ca9266419cad2c275deb93337a461f88c6f1 100644
Binary files a/QLearningApp/obj/Debug/QLearningApp.exe and b/QLearningApp/obj/Debug/QLearningApp.exe differ
diff --git a/QLearningApp/obj/Debug/QLearningApp.pdb b/QLearningApp/obj/Debug/QLearningApp.pdb
index ff74b3d8ed7901b7810459b8e1eab4d326b71edd..212d01b5aaf1be5750912b2a43e1e1ccbc55e85b 100644
Binary files a/QLearningApp/obj/Debug/QLearningApp.pdb and b/QLearningApp/obj/Debug/QLearningApp.pdb differ