diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000000000000000000000000000000000000..94a25f7f4cb416c083d265558da75d457237d671
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index c557f00ec4f3084715eb411459a61bf94bcb92e4..bb822320b0b5db0d1d65fe3f974e2fb14eb61126 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -8,8 +8,12 @@
     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
     <option name="LAST_RESOLUTION" value="IGNORE" />
   </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
   <component name="ProjectId" id="1UWaMsw6zBMWl6bWHnCTXed0kq5" />
   <component name="PropertiesComponent">
+    <property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
   </component>
   <component name="RunDashboard">
@@ -47,6 +51,9 @@
       <module name="Task_likelihood" />
       <option name="INTERPRETER_OPTIONS" value="" />
       <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
       <option name="SDK_HOME" value="" />
       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
       <option name="IS_MODULE_SDK" value="true" />
diff --git a/__pycache__/configuration.cpython-36.pyc b/__pycache__/configuration.cpython-36.pyc
index 19216a8cfb866d94afa95a5ae329a4cf9ee47f4d..83e4f68e8ee08c9310c661f96b30697eebb17b31 100644
Binary files a/__pycache__/configuration.cpython-36.pyc and b/__pycache__/configuration.cpython-36.pyc differ
diff --git a/config/__pycache__/__init__.cpython-36.pyc b/config/__pycache__/__init__.cpython-36.pyc
index 28b47b6d3af461c0305ac294cb553cdfc58de179..f948b57987e77dbdac373c3ee2fff24229f493d2 100644
Binary files a/config/__pycache__/__init__.cpython-36.pyc and b/config/__pycache__/__init__.cpython-36.pyc differ
diff --git a/config/__pycache__/config.cpython-36.pyc b/config/__pycache__/config.cpython-36.pyc
index 2d7d4051f78e15cfb1f9dbe8d4b3efa5d00616b5..690e69f069ede00651f0b3c6eaae4b77b2dd567b 100644
Binary files a/config/__pycache__/config.cpython-36.pyc and b/config/__pycache__/config.cpython-36.pyc differ
diff --git a/learn_consistent.py b/learn_consistent.py
deleted file mode 100644
index bd108f10239a1544c921f1df9115f08cd5c5fa47..0000000000000000000000000000000000000000
--- a/learn_consistent.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from keras.models import Model
-from keras.layers import Dense, Input, Conv2D, Flatten, MaxPooling2D
-from configuration import conf
-from utils.dataloader import Sequential_loader
-import numpy as np
-from utils.model_utils import mask_layer_by_task
-from utils.layers import Probability_CLF_Mul_by_task
-from utils.train_utils import train_with_task
-from utils.predict_utils import get_task_likelihood, get_test_acc
-
-PATH = './results/%s/' % conf.dataset_name
-
-epochs = 50
-latent_dim = 250
-output_dim = 10
-verbose = 0
-
-data_loader = Sequential_loader()
-
-inputs = Input(shape=(784,))
-task_input = Input(shape=(5,))
-archi = Dense(1000, activation='relu')(inputs)
-archi = mask_layer_by_task(task_input, archi)
-archi = Dense(1000, activation='relu')(archi)
-archi = mask_layer_by_task(task_input, archi)
-
-task_output = Probability_CLF_Mul_by_task(conf.num_tasks, num_centers=output_dim // conf.num_tasks)(
-    [task_input, archi])
-task_output = mask_layer_by_task(task_input, task_output, 'task_out')
-clf = Dense(output_dim, activation='softmax')(archi)
-clf = mask_layer_by_task(task_input, clf, 'clf_out')
-model = Model(inputs=[inputs, task_input], outputs=[clf, task_output])
-model.compile(loss=['categorical_crossentropy', 'mse'], optimizer='adam', metrics=['accuracy', 'mse'],
-              loss_weights=[1, 4])
-
-tlh = [] # Task Likelihood
-tlh_std = [] # Standard Deviation of Task Likelihood
-test_acc = []
-for task_idx in range(conf.num_tasks):
-    # Learn a new task
-    train_with_task(model, task_idx=task_idx, data_loader=data_loader)
-    # Get the likelihood of the current task
-    mean, std = get_task_likelihood(model, learned_task=task_idx, test_task=task_idx, data_loader=data_loader)
-    tlh.append(mean)
-    tlh_std.append(std)
-    # Get the likelihood of the next task
-    if task_idx < conf.num_tasks - 1:
-        mean, std = get_task_likelihood(model, learned_task=task_idx, test_task=task_idx+1, data_loader=data_loader)
-        tlh.append(mean)
-        tlh_std.append(std)
-    # Run 200 times to get the test accuracy (for drawing the figure)
-    for _ in range(conf.num_runs):
-        test_acc.append(get_test_acc(model,data_loader,test_on_whole_set=False))
-    # Print the average test accuracy across all the tasks
-    print('Learned %dth Task, Average test accuracy on all the task : %.3f'%(task_idx,get_test_acc(model, data_loader, test_on_whole_set=True)))
-
-
-def paa(sample, w=None):
-    w = sample.shape[0] // 20 if w is None else w
-    l = len(sample)
-    stepfloat = l / w
-    step = int(np.ceil(stepfloat))
-    start = 0
-    j = 1
-    paa = []
-    while start <= (l - step):
-        section = sample[start:start + step]
-        paa.append(np.mean(section))
-        start = int(j * stepfloat)
-        j += 1
-    return paa
-
-
-tlh_s = []
-for i in tlh:
-    tlh_s += i.tolist()
-tlh_s = np.array(tlh_s)
-
-tlh_std_s = []
-for i in tlh_std:
-    tlh_std_s += i.tolist()
-tlh_std_s = np.array(tlh_std_s)
-
-test_acc_s = np.array(test_acc).reshape(-1)
-
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-sns.set()
-
-tlh = np.array(paa(tlh_s))
-tlh_std = np.array(paa(tlh_std_s))
-test_acc = np.array(paa(test_acc_s, tlh.shape[0]))
-
-fig = sns.lineplot(np.arange(len(tlh)), tlh, label='Task Likelihood')
-fig.fill_between(np.arange(len(tlh)), tlh - tlh_std, tlh + tlh_std, alpha=0.3)
-fig = sns.lineplot(np.arange(len(tlh)), test_acc, label='Test Accuracy')
-a = [10, 30, 50, 70]
-for i in a:
-    fig.fill_between(np.arange(i, i + 10 + 1), 0, 0.1, alpha=0.1, color='red')
-    fig.fill_between(np.arange(i - 10, i + 1), 0, 0.1, alpha=0.1, color='green')
-fig.fill_between(np.arange(90 - 10, 90), 0, 0.1, alpha=0.1, color='green')
-# a = fig.get_xticklabels()
-fig.set_xticklabels(['', 'Task 1', 'Task 2', 'Task 3', 'Task 4', 'Task 5'])
-plt.legend(loc='center right')
-plt.savefig(PATH + 'result')
-plt.show()
diff --git a/split_mnit.py b/split_mnit.py
deleted file mode 100644
index 3da899707c894fb003263743d68eaea2b0f986c3..0000000000000000000000000000000000000000
--- a/split_mnit.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import tensorflow as tf
-from keras.models import Model
-from keras.layers import Dense, Input, Lambda, Dropout
-from configuration import conf
-from utils.dataloader import Sequential_loader
-import numpy as np
-from utils.layers import Probability_CLF_Mul_by_task
-
-
-def mask_layer_by_task(task_input, input_tensor, name=None, return_mask=False):
-    mask = tf.expand_dims(task_input, axis=-1)
-    mask = tf.tile(mask, multiples=[1, 1, input_tensor.shape[1] // conf.num_tasks])
-    mask = tf.keras.layers.Flatten()(mask)
-    if name is None:
-        out = Lambda(lambda x: x * mask)(input_tensor)
-    else:
-        out = Lambda(lambda x: x * mask, name=name)(input_tensor)
-    if return_mask:
-        return out, mask
-    else:
-        return out
-
-
-def get_model_keras_mask(output_dim, label=None):
-    inputs = Input(shape=(784,))
-    task_input = Input(shape=(5,))
-    archi = Dense(1000, activation='relu')(inputs)
-    archi = mask_layer_by_task(task_input, archi)
-    archi = Dense(1000, activation='relu')(archi)
-    archi = mask_layer_by_task(task_input, archi)
-
-    task_output = Probability_CLF_Mul_by_task(conf.num_tasks, num_centers=output_dim // conf.num_tasks)(
-        [task_input, archi])
-    task_output = mask_layer_by_task(task_input, task_output, 'task_out')
-    clf = Dense(output_dim, activation='softmax')(archi)
-    clf = mask_layer_by_task(task_input, clf, 'clf_out')
-    model = Model(inputs=[inputs, task_input], outputs=[clf, task_output])
-    model_latent = Model(inputs=inputs, outputs=archi)
-    model.compile(loss=['categorical_crossentropy', 'mse'], optimizer='adam', metrics=['accuracy', 'mse'],
-                  loss_weights=[1, 4])
-
-    return model, model_latent
-
-
-data_loader = Sequential_loader()
-
-model, model_latent = get_model_keras_mask(10)
-for task_idx in range(conf.num_tasks):
-    x, y = data_loader.sample(task_idx=task_idx, whole_set=True)
-    task_input = np.zeros([y.shape[0], conf.num_tasks])
-    task_input[:, task_idx] = 1
-    model.fit([x, task_input], [y, task_input], epochs=10, batch_size=conf.batch_size, verbose=0)
-    if task_idx == 0:
-        model.layers[1].trainable = False
-        model.compile(loss=['categorical_crossentropy', 'mse'], optimizer='adam', metrics=['accuracy', 'mse'],
-                  loss_weights=[1, 4])
-
-for task_idx in range(conf.num_tasks):
-    x, y = data_loader.sample(task_idx, whole_set=True, dataset='test')
-    for test_idx in range(conf.num_tasks):
-        task_input = np.zeros([y.shape[0], conf.num_tasks])
-        task_input[:, test_idx] = 1
-        res = np.max(model.predict([x, task_input])[1], axis=1)
-
-
-block_size = conf.test_batch_size
-
-
-def block_likelihood(res):
-    block_likelihood = []
-    for r in res:
-        extra_index = r.shape[0] % block_size
-        extra_values = r[-extra_index:]
-        resize_values = r[:-extra_index]
-        r = resize_values.reshape(-1, block_size)
-        r = np.mean(r, axis=1, keepdims=True)
-        r = np.repeat(r, block_size, axis=1).reshape(-1, )
-        extra = np.repeat(np.mean(extra_values), len(extra_values))
-        final = np.append(r, extra)
-        block_likelihood.append(final)
-    return block_likelihood
-
-
-test_acc = []
-for task_idx in range(conf.num_tasks):
-    x, y = data_loader.sample(task_idx, whole_set=True, dataset='test')
-    res = []
-    pred = []
-    for test_idx in range(conf.num_tasks):
-        task_input = np.zeros([y.shape[0], conf.num_tasks])
-        task_input[:, test_idx] = 1
-        prediction = model.predict([x, task_input])
-        res.append(np.max(prediction[1], axis=1))
-        pred.append(np.argmax(prediction[0], axis=1))
-
-    res = block_likelihood(res)
-    pred = np.array(pred)
-    acc = np.sum(pred[np.argmax(res, axis=0), np.arange(pred.shape[1])] == np.argmax(y, axis=1)) / y.shape[0]
-    print('Task %d, Accuracy %.3f' % (task_idx, acc))
-    test_acc.append(acc)
-print('Average of Test Accuracy : %.3f' % np.mean(test_acc))
diff --git a/utils/__pycache__/__init__.cpython-36.pyc b/utils/__pycache__/__init__.cpython-36.pyc
index bd32f26b03d4f9dd41ab1b726dfab36b7b7a3a55..f8cb1f3a870abe37f5fb1e543bd5a36f0ba9d4b1 100644
Binary files a/utils/__pycache__/__init__.cpython-36.pyc and b/utils/__pycache__/__init__.cpython-36.pyc differ
diff --git a/utils/__pycache__/dataloader.cpython-36.pyc b/utils/__pycache__/dataloader.cpython-36.pyc
index 3bf2dd290e23b85938261bc5933b7084594e979a..2aca37454c4aa6041ffe93a069f661c8d7527c6d 100644
Binary files a/utils/__pycache__/dataloader.cpython-36.pyc and b/utils/__pycache__/dataloader.cpython-36.pyc differ
diff --git a/utils/__pycache__/layers.cpython-36.pyc b/utils/__pycache__/layers.cpython-36.pyc
index e604adb6d884387df3294be2c16727cc5e03cb1d..85b587689f8305f245155ff96d4ceb60be1bff49 100644
Binary files a/utils/__pycache__/layers.cpython-36.pyc and b/utils/__pycache__/layers.cpython-36.pyc differ
diff --git a/utils/__pycache__/load_data.cpython-36.pyc b/utils/__pycache__/load_data.cpython-36.pyc
index 4861423c1a27201f4fc6b3ddc91befe2d3a0c442..b57fef0350905954af9d49233c9ba39c3dedc03d 100644
Binary files a/utils/__pycache__/load_data.cpython-36.pyc and b/utils/__pycache__/load_data.cpython-36.pyc differ
diff --git a/utils/__pycache__/model_utils.cpython-36.pyc b/utils/__pycache__/model_utils.cpython-36.pyc
index 9089c9c9b4b2623217da9d7546d1426a45df0879..370047321c1213a910244d1eb239915b34d432d4 100644
Binary files a/utils/__pycache__/model_utils.cpython-36.pyc and b/utils/__pycache__/model_utils.cpython-36.pyc differ
diff --git a/utils/__pycache__/predict_utils.cpython-36.pyc b/utils/__pycache__/predict_utils.cpython-36.pyc
index 8e9f42220f6e324d31f1f89cbdf450958905b75e..f37f7415c433357c3ef575205da3c3e5053e97e8 100644
Binary files a/utils/__pycache__/predict_utils.cpython-36.pyc and b/utils/__pycache__/predict_utils.cpython-36.pyc differ
diff --git a/utils/__pycache__/train_utils.cpython-36.pyc b/utils/__pycache__/train_utils.cpython-36.pyc
index e68cb6674023f972b9010a932f219a634f8d616b..537f2e57a4b42a0b7ff7a3c4ed5203b7b301ee2a 100644
Binary files a/utils/__pycache__/train_utils.cpython-36.pyc and b/utils/__pycache__/train_utils.cpython-36.pyc differ