diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/GAN_Based_defense.py b/Taheri2020NCAA-labelflipping_Sourcecode/GAN_Based_defense.py
deleted file mode 100644
index a2e3885bf575e0ce4d0a321f07570a64a52c00c8..0000000000000000000000000000000000000000
--- a/Taheri2020NCAA-labelflipping_Sourcecode/GAN_Based_defense.py
+++ /dev/null
@@ -1,598 +0,0 @@
-# -*- coding: utf-8 -*-
- 
-"""
-Created on Fri May 25 12:03:10 2018
-
-@author: Rahim
-#this approch use the distribution of Benign data to poison thet test data
-"""
-from __future__ import print_function
-from sklearn.feature_selection import SelectFromModel
-from sklearn.feature_selection import SelectKBest, f_regression
-from sklearn.model_selection import KFold
-from sklearn.model_selection import cross_val_score
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import classification_report
-from sklearn.model_selection import train_test_split 
-from sklearn.metrics import confusion_matrix
-from sklearn import model_selection
-from sklearn.feature_selection import RFE
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.ensemble import RandomForestRegressor
-from scipy.sparse import csr_matrix, vstack, hstack
-from scipy import sparse
-import pandas as pd
-import numpy as np
-import random
-import time
-import argparse
-import math
-from numpy import *
-import os.path as osp
-import scipy.sparse as sp
-import pickle
-from sklearn import metrics
-from sklearn.metrics import accuracy_score
-#******************************************************************************
-CLASS = 'class'
-CLASS_BEN = 'B'
-CLASS_MAL = 'M'
-DATA = 'data'
-#********************************************Functions that will be used in this program*******************************************************************************************
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input-tables', nargs='*', dest='input_tables')
-
-    args = parser.parse_args()
-
-    return args
-#******************************************************************************
-def read_table(table_file):
-    
-        table = dict()
-        
-        with open(table_file, 'rb') as handle:
-            while True:
-                   try:
-                           table = pickle.load(handle)
-                   except EOFError:
-                           break
-        
-        f_set=set()
-        
-        for k,v in table.items():
-             for feature in v[DATA]:
-                f_set.add(feature)
-               
-        return table , f_set
-#******************************************************************************
-def build_table(tables):
-    full_table = dict()
-
-    file_set = set()
-    
-    for table in tables:
-        file_set.update(table.keys())
-        for key, val in table.items():
-            full_table[key] = val
-              
-    files = list(file_set)
-    return full_table, files
-#******************************************************************************
-def convert_to_matrix(table, features, files):
-    mat = sp.lil.lil_matrix((len(files), len(features)), dtype=np.int8)
-
-    print("Input Data Size =  ", mat.get_shape())
-    # the response vector
-   
-    cl = [0]*len(files)
-    
-    for key, val in table.items():
-        k = files.index(key)
-    
-        if val[CLASS] is CLASS_BEN:
-            cl[k] = 1
-       
-        for v in val[DATA]:
-            try:
-                idx = features.index(v)
-                mat[k, idx] = 1
-            except Exception as e:
-                print(e)
-                pass              
-        
-    return mat, cl
-#******************************************************************************
-def delete_row_lil(mat, i):
-    if not isinstance(mat, sp.lil.lil_matrix):
-        raise ValueError("works only for LIL format -- use .tolil() first")
-    mat.rows = np.delete(mat.rows, i)
-    mat.data = np.delete(mat.data, i)
-    mat._shape = (mat._shape[0] - 1, mat._shape[1])
-#******************************************************************************
-def relevant_features(data, response_vector, features):
-    rel_features = list()
-    ranked_index=list()
-    
-    model =RandomForestRegressor()
-    rfe = RFE(model, 1)
-    fit = rfe.fit(data, response_vector)
-    old_features=features
-
-    for i in fit.ranking_:
-        if i<len(features):
-              rel_features.append(features[i])
-    ranked_index=[old_features.index(x) for x in rel_features if x in old_features]
-       
-    return rel_features ,ranked_index
-#*****************************************************************Main Function*********************************************************************************************************
-def main():
-    args = parse_args()
-
-    tables = []
-    f_set = set()
-    
-    #read the data
-    for t_files in args.input_tables:
-        table, features = read_table(t_files)
-        f_set = f_set.union(features)
-        tables.append(table)
-    print("                                                                                         ")
-    print("                                                                                         ")
-    print("*****************************************************************************************")
-    print("********Using Benign Distribution + Random Forest Classifier + GAN countermeasure********") 
-    print("*****************************************************************************************")
-
-    #*build table from data and convert to matrix 
-    full_table, files = build_table(tables)
-    files.sort()
-    features = list(f_set)
-    features.sort()
-    mat, cl = convert_to_matrix(full_table, features, files) 
-   
-    #Doing feature Ranking on all of the Data
-    print("************************Doing feature Ranking on all of the Data*************************")
-    t0=time.time()
-    r_features,ranked_index = relevant_features(mat, cl, features)
-    t1=time.time()
-    print("Time of Feature Ranking=",t1-t0)
-    print("******************************************************************************************")
- 
-    original_selected=ranked_index[1:301]
-    data = sparse.lil_matrix(sparse.csr_matrix(mat)[:,original_selected])
-    seed = 10
-    test_size = 0.2
-    X_train, X_test, Y_train, Y_test= train_test_split(data, cl, test_size= test_size, random_state=seed)
-    test_size = 0.25
-    X_train, X_val, Y_train, Y_val= train_test_split(X_train, Y_train, test_size= test_size, random_state=seed)  
-    #**************************************************************************
-    num_trees = 100
-    max_features = 3
-    t0=time.time()
-    kfold = KFold(n_splits=10, random_state=10)
-    model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)
-    model.fit(X_train, Y_train)
-    t1=time.time()
-    print("Time for Clssification Algorithm is runing on 300 high-ranked features =",t1-t0)
-    print("************************************Result without attack *******************************************************************************************")
-    # compute Classification Accuracy in train and test and Validation
-    scoring = 'accuracy'
-    results = model_selection.cross_val_score(model, X_train,Y_train, cv=kfold, scoring=scoring)
-    print(("The accuracy of Classification in train: %.3f (%.3f)") % (results.mean(), results.std()))
-    #********************* compute Classification Accuracy in validation***************************
-    scoring = 'accuracy'
-    results = model_selection.cross_val_score(model, X_val,Y_val, cv=kfold, scoring=scoring)
-    print(("The accuracy of Classification in validation: %.3f (%.3f)") % (results.mean(), results.std()))
-    #********************* compute Classification Accuracy in test*********************************
-    scoring = 'accuracy'
-    results = model_selection.cross_val_score(model, X_test,Y_test, cv=kfold, scoring=scoring)
-    print(("The accuracy of Classification in test: %.3f (%.3f)") % (results.mean(), results.std()))
-    #********************* compute Classification Accuracy in Validation***************************
-    predictions_val = model.predict(X_val)
-    print("classification_report by validation:")
-    print(classification_report(Y_val, predictions_val))
-    #********************* compute Classification Accuracy in train********************************
-    predictions = model.predict(X_test)
-    print("classification_report by test:")
-    print(classification_report(Y_test, predictions))
-    #********************* compute Logarithmic Loss in Train*********************************
-    scoring = 'neg_log_loss'
-    results = model_selection.cross_val_score(model, X_train,Y_train, cv=kfold, scoring=scoring)
-    print(("The Loss of Classification in train data: %.3f (%.3f)") % (results.mean(), results.std()))
-    #********************* compute Logarithmic Loss in validation**************************** 
-    scoring = 'neg_log_loss'
-    results = model_selection.cross_val_score(model, X_val,Y_val, cv=kfold, scoring=scoring)
-    print(("The Loss of Classification in validation data:: %.3f (%.3f)") % (results.mean(), results.std()))
-    #********************* compute Logarithmic Loss in Test***********************************
-    scoring = 'neg_log_loss'
-    results = model_selection.cross_val_score(model, X_test,Y_test, cv=kfold, scoring=scoring)
-    print(("The Loss of Classification in test data:: %.3f (%.3f)") % (results.mean(), results.std()))
-    #********************* compute Area Under ROC Curve in Train******************************
-    scoring = 'roc_auc'
-    results = model_selection.cross_val_score(model, X_train,Y_train, cv=kfold, scoring=scoring)
-    print(("The Area Under ROC Curve in Train: %.3f (%.3f)") % (results.mean(), results.std()))
-    #********************* compute Area Under ROC Curve in Validation*************************
-    scoring = 'roc_auc'
-    results = model_selection.cross_val_score(model, X_val,Y_val, cv=kfold, scoring=scoring)
-    print(("The Area Under ROC Curve in Validation: %.3f (%.3f)") % (results.mean(), results.std()))
-    #********************* compute Area Under ROC Curve in Test*******************************
-    scoring = 'roc_auc'
-    results = model_selection.cross_val_score(model, X_test,Y_test, cv=kfold, scoring=scoring)
-    print(("The Area Under ROC Curve in test: %.3f (%.3f)") % (results.mean(), results.std()))
-    #*****************************Compute FPR and TPR in Validation**************************
-    cm=confusion_matrix(Y_test, predictions)
-    print("confusion_matrix=")
-    print(cm)
-    TP=cm[0][0]
-    print("TP=",TP)
-    FP=cm[0][1]
-    print("FP=",FP)
-    FN=cm[1][0]
-    print("FN=",FN)
-    TN=cm[1][1]
-    print("TN=",TN)
-    FPR=FP/(FP+TN)
-    print("The FPR result=", FPR)
-    TPR=TP/(TP+FN)
-    print("The TPR result=", TPR)
-    
-    TNR=TN/(TN+FP)
-    print("The TNR result=", TNR)
-    
-    FNR=FN/(FN+TP)
-    print("The FNR result=", FNR)
-    
-    AUC=1/(2*((TN/(TN+FP))+(TP/(TP+FP))))
-    print("The AUC result=", AUC)
-    
-    ACC=(TP+TN)/(TP+TN+FP+FN)
-    print("The ACC result=", ACC)
-    
-    MCC=(TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
-    print("The Matthews correlation coefficient result=", MCC)
-    
-    print("*******************************End of Result without attack:*****************************************************************************************")
-    #*************************************************************************************************************************************************************
-    # finding Malware of test data
-    malware_test= sparse.lil_matrix(X_test)
-    cl_malware=list()
-    z_m=0    
-    count_m=0
-    for i, j in enumerate(Y_test):
-        if j == 1:
-            delete_row_lil(malware_test, i-count_m)
-            count_m=count_m+1
-        else:
-            cl_malware.insert(z_m, 1)
-            z_m=z_m+1
-
-    #**************************
-    #finding Benign of test data
-    benign_test = sparse.lil_matrix(X_test)
-    cl_benign=list()
-    z_b=0    
-    count_b=0
-    for i, j in enumerate(Y_test):
-        if j == 0:
-            delete_row_lil(benign_test.tolil(), i-count_b)
-            count_b=count_b+1
-        else:
-            cl_benign.insert(z_b, 1)
-            z_b=z_b+1
-    #************************** 
-    # finding Malware of Train data
-    malware_train= sparse.lil_matrix(X_train)
-    cl_malware=list()
-    z_m=0    
-    count_m=0
-    for i, j in enumerate(Y_train):
-         if j == 1:
-            delete_row_lil(malware_train, i-count_m)
-            count_m=count_m+1
-         else:
-            cl_malware.insert(z_m, 1)
-            z_m=z_m+1 
-    #***************************
-    #Finding Benign of Train data
-    cl_X_train=list(Y_train) 
-    benign_train=sparse.lil_matrix(X_train)
-    z_b=0    
-    count_b=0
-    cl_benign_train=list()
-    for i, j in enumerate(cl_X_train):
-        if j == 0:
-            delete_row_lil(benign_train, i-count_b)
-            count_b=count_b+1
-        else:
-            cl_benign_train.insert(z_b, 1)
-            z_b=z_b+1
-    print("***********Size of Each Data Part:**********")        
-    print("malware_train=", malware_train.get_shape())
-    print("benign_train=", benign_train.get_shape())
-    print("malware_test=", malware_test.get_shape())
-    print("benign_test=", benign_test.get_shape())
-    #***************************************************
-    t0=time.time()
-    ranked_features_in_benign,ranked_index_of_benign = relevant_features(benign_train,cl_benign_train, features)  
-    t1=time.time()
-    print("Time for Ranking benign_train to find important features =",t1-t0)
-    #***************************************************************************************************************************************************************
-    numbers=list()
-    numbers=[3,6,9,12,15,18,21,24,27,30,60]
-    X_test = sp.lil.lil_matrix(X_test)
-    
-    for loop in range(10):
-        print("************************************************************************************************************************************************************************************")
-        print("Result related to loop number : ",loop)
-       
-        Malware_Test=sparse.lil_matrix(malware_test.copy()) 
-        row_of_Malware,column_of_Malware=Malware_Test.get_shape()
-        index_of_row=list(range(row_of_Malware))
-        random.shuffle(index_of_row)
-
-        number_of_row_to_change=int(row_of_Malware/10)
-        selected_row=index_of_row[0:number_of_row_to_change]
-        
-        for i, v in enumerate(numbers):
-            print("*****************************************************************************************************************************************************")
-            print("*********************selected features :",int(v) )
-            print("************************************Result after attack *************************")
-            max_index_of_column=int(v)+1
-            t0=time.time()
-            rw_test,cl_test=X_test.get_shape()
-            poison_data=sp.lil.lil_matrix((0,cl_test),dtype=np.int8)
-            Malware_Test=sparse.lil_matrix(malware_test.copy())
-
-            counter_of_poisoned_point=0
-            
-            for m,value in enumerate(selected_row):
-                flag=0
-                for i, j in enumerate(ranked_index_of_benign[1:max_index_of_column]):
-                    for k,l in enumerate(original_selected):
-                      if j==l:     
-                          if Malware_Test[value,l]==0:
-                                Malware_Test[value,l]=1
-                                flag=1
-                if flag==1:
-                    counter_of_poisoned_point=counter_of_poisoned_point+1
-           
-            
-            Benign_Test=sparse.lil_matrix(benign_test.copy()) 
-            poison_data = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((Benign_Test, Malware_Test))))
-            r,w=poison_data.get_shape()
-            Y_test=Y_test[0:r]
-            
-            t1=time.time()                   
-            print("Time related to applying attack in this number of Features= ",t1-t0)
-            
-            print("Number of poisoned Malware= ",counter_of_poisoned_point)
-            #********************* compute Classification Accuracy in test*********************************
-            scoring = 'accuracy'
-            results = model_selection.cross_val_score(model, poison_data,Y_test, cv=kfold, scoring=scoring)
-            print(("The accuracy of Classification in test: %.3f (%.3f)") % (results.mean(), results.std()))
-           
-            #********************* compute Classification Accuracy in train********************************
-            predictions = model.predict(poison_data)
-            print("classification_report by test:")
-            print(classification_report(Y_test, predictions))
-          
-            #********************* compute Logarithmic Loss in Test***********************************
-            scoring = 'neg_log_loss'
-            results = model_selection.cross_val_score(model, poison_data,Y_test, cv=kfold, scoring=scoring)
-            print(("The Loss of Classification in test data:: %.3f (%.3f)") % (results.mean(), results.std()))
-           
-            #********************* compute Area Under ROC Curve in Test*******************************
-            scoring = 'roc_auc'
-            results = model_selection.cross_val_score(model, poison_data,Y_test, cv=kfold, scoring=scoring)
-            print(("The Area Under ROC Curve in test: %.3f (%.3f)") % (results.mean(), results.std()))
-            #*****************************Compute FPR and TPR in Validation**************************
-            cm=confusion_matrix(Y_test, predictions)
-            print("confusion_matrix=")
-            print(cm)
-            TP=cm[0][0]
-            print("TP=",TP)
-            FP=cm[0][1]
-            print("FP=",FP)
-            FN=cm[1][0]
-            print("FN=",FN)
-            TN=cm[1][1]
-            print("TN=",TN)
-            FPR=FP/(FP+TN)
-            print("The FPR result=", FPR)
-            
-            TPR=TP/(TP+FN)
-            print("The TPR result=", TPR)
-            
-            TNR=TN/(TN+FP)
-            print("The TNR result=", TNR)
-            
-            FNR=FN/(FN+TP)
-            print("The FNR result=", FNR)
-            
-            AUC=1/(2*((TN/(TN+FP))+(TP/(TP+FP))))
-            print("The AUC result=", AUC)
-            
-            ACC=(TP+TN)/(TP+TN+FP+FN)
-            print("The ACC result=", ACC)
-            
-            MCC=(TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
-            print("The Matthews correlation coefficient result=", MCC)
-            
-            print("********************************************************************************")
-            print("*******************Result after applying GAN countermeasure**************")
-            t0=time.time()
-
-
-            model2 = ExtraTreesClassifier(n_estimators=250,random_state=0)
-            model2.fit(benign_train, cl_benign_train)
-            importances = model2.feature_importances_
-            indices = np.argsort(importances)[::-1]
-            
-
-            importance_of_Features_in_benign_train=list()
-            for f in range(60):
-                importance_of_Features_in_benign_train.append(indices[f])
-
-            #******************************Runing the Logistic Regression and  finding Some Sampels Near to Hyperplain*****************************
-            poison_model = LogisticRegression() 
-            poison_model.fit(X_train,Y_train)
-            print("Result related to Logistic Regression:")
-            scoring = 'accuracy'
-            poison_results = model_selection.cross_val_score(poison_model, X_train,Y_train, cv=kfold, scoring=scoring)
-            print(("The accuracy of Classification in train: %.3f (%.3f)") % (poison_results.mean(), poison_results.std()))
-            #********************* compute Logistic Regression Accuracy in validation without change ***************************
-            scoring = 'accuracy'
-            results = model_selection.cross_val_score(poison_model, X_val,Y_val, cv=kfold, scoring=scoring)
-            print(("The accuracy of Classification in validation: %.3f (%.3f)") % (poison_results.mean(), poison_results.std()))
-            #********************* compute Logistic Regression Accuracy in test without change *********************************
-            scoring = 'accuracy'
-            results = model_selection.cross_val_score(poison_model, X_test,Y_test, cv=kfold, scoring=scoring)
-            print(("The accuracy of Classification in test: %.3f (%.3f)") % (poison_results.mean(), poison_results.std()))
-            #**********************Declration of Variables for finding desision value *************
-            print("**************************************************************************************************")           
-            temp=sparse.lil_matrix(X_train)
-            a,b=temp.get_shape()
-            decision_value=np.array([])
-            selected_cl_malware_train=list()
-            selected_malware_train = sparse.lil_matrix(X_train)       
-            #**********************finding malware_train and related desision value **********************************
-            counter_of_malware_train=0
-            count_deleted=0
-            for j in range(a):
-                row=temp.getrow(j)
-                if Y_train[j]==0:
-                    decision_value=np.append(decision_value,poison_model.decision_function(row))
-                    selected_cl_malware_train.insert(counter_of_malware_train, 0)
-                    counter_of_malware_train=counter_of_malware_train+1
-                else:
-                    delete_row_lil(selected_malware_train.tolil(), j-count_deleted)
-                    count_deleted=count_deleted+1 
-            #**********************sort the absolute value of decision value for malware_train*************************      
-            decision_value=np.absolute(decision_value)
-            indices=decision_value.argsort()
-              
-            #************** Declration of Variables for selecting data*************************************************
-            number_of_row_malware_train,number_of_column_malware_train=malware_train.get_shape()
-               
-            number_of_row_selected_malware_train=int(number_of_row_malware_train/10)
-            
-            #****************Selecting index related to 10 percent of malware_train with minimum decision value*******
-            Selected_rows_as_less_likely=list()
-            Selected_rows_as_less_likely=indices[:number_of_row_selected_malware_train]
-                                 
-            Malware_less_likely=sp.lil.lil_matrix((0, number_of_column_malware_train), dtype=np.int8)
-            cl_less_likely=list()
-            counter_for_cl_less_likely=0            
-            for i,row_number in enumerate(Selected_rows_as_less_likely):
-                selected_row=malware_train.getrow(row_number)
-                Malware_less_likely= sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((selected_row, Malware_less_likely))))
-                cl_less_likely.insert(counter_for_cl_less_likely,0)
-                counter_for_cl_less_likely=counter_for_cl_less_likely+1
-                
-            number_of_row_in_Malware_less_likely,number_of_column_in_Malware_less_likely=Malware_less_likely.get_shape()
-            #****************finding Benign like samples********************************************************************************
-            poisoned_data=sp.lil.lil_matrix((0, number_of_column_malware_train), dtype=np.int8)
-            c=0
-            for counter_of_Malware_less_likely in range(number_of_row_in_Malware_less_likely):
-                selected_sample=Malware_less_likely.getrow(counter_of_Malware_less_likely)
-
-                
-                c=0
-                for S in range(number_of_column_in_Malware_less_likely):     
-                        index_for_change=random.randint(0,number_of_column_in_Malware_less_likely-1)
-                        if selected_sample[0,index_for_change]==0:
-                            selected_sample[0,index_for_change]=1 
-                        label=model.predict(selected_sample)
-                        if label==int(1):
-                            poisoned_data= sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((selected_sample, poisoned_data))))
-                            c=c+1
-                            break       
-             
-            Number_of_row_in_poisoned_data,Number_of_column_in_poison_data=poisoned_data.get_shape()
-            Y_poisoin=list()
-            for index in range(Number_of_row_in_poisoned_data):
-                Y_poisoin.append(0)
-            #***************************************************************************************************************************
-            
-            poisoned_data_X=poisoned_data.copy()
-            poisoned_data_Y=Y_poisoin[:]
-            second_test_set=0.2
-            X_poisoned_train, X_poisoned_test, Y_poisoned_train, Y_poisoned_test= train_test_split(poisoned_data_X, poisoned_data_Y, test_size= second_test_set, random_state=seed)
-            poison_data_for_retraining = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_train, X_poisoned_train))))
-            poison_Class_for_retraining = Y_train + Y_poisoned_train
-            
-            num_trees = 100
-            max_features = 3
-            kfold = KFold(n_splits=10, random_state=10)
-            model_for_counter_measure = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)
-            model_for_counter_measure.fit(poison_data_for_retraining,poison_Class_for_retraining)
-    
-    
-            poison_data_for_test_after_retraining = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_test, X_poisoned_test))))
-            poison_Class_for_test_after_retraining= Y_test + Y_poisoned_test
-    
-            t1=time.time()
-            print("Time related to applying  GAN countermeasure in this number of Features: ",t1-t0)
-            #********************* compute Classification Accuracy in test*********************************
-            scoring = 'accuracy'
-            results = model_selection.cross_val_score(model_for_counter_measure, poison_data_for_test_after_retraining,poison_Class_for_test_after_retraining, cv=kfold, scoring=scoring)
-            print(("The accuracy of Classification in test: %.3f (%.3f)") % (results.mean(), results.std()))
-           
-            #********************* compute Classification Accuracy in train********************************
-            predictions = model.predict(poison_data_for_test_after_retraining)
-            print("classification_report by test:")
-            print(classification_report(poison_Class_for_test_after_retraining, predictions))
-          
-            #********************* compute Logarithmic Loss in Test***********************************
-            scoring = 'neg_log_loss'
-            results = model_selection.cross_val_score(model_for_counter_measure, poison_data_for_test_after_retraining , poison_Class_for_test_after_retraining, cv=kfold, scoring=scoring)
-            print(("The Loss of Classification in test data:: %.3f (%.3f)") % (results.mean(), results.std()))
-           
-            #********************* compute Area Under ROC Curve in Test*******************************
-            scoring = 'roc_auc'
-            results = model_selection.cross_val_score(model_for_counter_measure, poison_data_for_test_after_retraining , poison_Class_for_test_after_retraining, cv=kfold, scoring=scoring)
-            print(("The Area Under ROC Curve in test: %.3f (%.3f)") % (results.mean(), results.std()))
-            #*****************************Compute FPR and TPR in Validation**************************
-            cm=confusion_matrix(poison_Class_for_test_after_retraining, predictions)
-            print("confusion_matrix=")
-            print(cm)
-            TP=cm[0][0]
-            print("TP=",TP)
-            FP=cm[0][1]
-            print("FP=",FP)
-            FN=cm[1][0]
-            print("FN=",FN)
-            TN=cm[1][1]
-            print("TN=",TN)
-            FPR=FP/(FP+TN)
-            print("The FPR result=", FPR)
-            
-            TPR=TP/(TP+FN)
-            print("The TPR result=", TPR)
-            
-            TNR=TN/(TN+FP)
-            print("The TNR result=", TNR)
-            
-            FNR=FN/(FN+TP)
-            print("The FNR result=", FNR)
-            
-            AUC=1/(2*((TN/(TN+FP))+(TP/(TP+FP))))
-            print("The AUC result=", AUC)
-            
-            ACC=(TP+TN)/(TP+TN+FP+FN)
-            print("The ACC result=", ACC)
-            
-            MCC=(TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
-            print("The Matthews correlation coefficient result=", MCC)
-
-            print("Result Related to this Numbers of features is finished:",int(v))
-            Malware_Test=sparse.lil_matrix(malware_test.copy()) 
-            selected_row=index_of_row[0:number_of_row_to_change]
-            original_selected=ranked_index[1:301]
-        print("End of loop number : ",loop)
-        print("************************************************************************************************************************************************************************************")
-#********************************************************************************************************************************************************************
-if __name__ == "__main__":
-    main()
-#******************************************************************************
diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_with_Feature_Selection(LSD_CSD_KDD).py b/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_with_Feature_Selection(LSD_CSD_KDD).py
deleted file mode 100644
index a5397c5acf8201eb291eab2c0a077d74117e6bbd..0000000000000000000000000000000000000000
--- a/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_with_Feature_Selection(LSD_CSD_KDD).py
+++ /dev/null
@@ -1,840 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jun 21 14:25:17 2019
-
-@author: Rahim
-"""
-#*****************************************************************import Library*****************************************************************************
-from __future__ import print_function
-from sklearn.feature_selection import SelectFromModel
-from sklearn.feature_selection import SelectKBest, f_regression
-from sklearn.model_selection import KFold
-from sklearn.model_selection import cross_val_score
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import classification_report
-from sklearn.model_selection import train_test_split 
-from sklearn.metrics import confusion_matrix
-from sklearn import model_selection
-from sklearn.feature_selection import RFE
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.ensemble import RandomForestRegressor
-from scipy.sparse import csr_matrix, vstack, hstack
-from scipy.sparse import coo_matrix
-from keras.preprocessing.text import one_hot
-from sklearn import metrics
-from sklearn.metrics import silhouette_samples, silhouette_score
-from sklearn.semi_supervised import LabelPropagation
-from sklearn.semi_supervised import LabelSpreading
-from sklearn.semi_supervised import label_propagation
-from sklearn.metrics import roc_auc_score
-from sklearn.metrics import f1_score
-from sklearn.cluster import KMeans
-import math
-#import keras
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation , Flatten
-from sklearn.metrics import log_loss
-from keras.optimizers import SGD
-from keras.layers.normalization import BatchNormalization
-from keras.layers.convolutional import UpSampling2D
-from keras.layers.convolutional import Conv2D, MaxPooling2D, MaxPooling1D
-from keras.layers.embeddings import Embedding
-from scipy import sparse
-import pandas as pd
-import numpy as np
-#import random
-import sklearn
-from sklearn.metrics.pairwise import manhattan_distances
-from keras.models import Model
-from keras.layers import  Conv1D, multiply, GlobalMaxPool1D, Input , Lambda
-import time
-import argparse
-#import math
-from numpy import *
-import os.path as osp
-import scipy.sparse as sp
-import pickle
-from sklearn.metrics import accuracy_score
-from warnings import simplefilter
-#*********************************************************************************************************************************
-CLASS = 'class'
-CLASS_BEN = 'B'
-CLASS_MAL = 'M'
-DATA = 'data'
-#********************************************Functions that will be used in this program*****************************************
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input-tables', nargs='*', dest='input_tables')
-
-    args = parser.parse_args()
-
-    return args
-#*********************************************************************************************************************************
-def read_table(table_file):
-    
-        table = dict()
-        
-        with open(table_file, 'rb') as handle:
-            while True:
-                   try:
-                           table = pickle.load(handle)
-                   except EOFError:
-                           break
-        
-        f_set=set()
-        
-        for k,v in table.items():
-             for feature in v[DATA]:
-                f_set.add(feature)
-               
-        return table , f_set
-#******************************************************************************
-def relevant_features(data, response_vector, features):
-    rel_features = list()
-    ranked_index=list()
-    
-    model =RandomForestRegressor()
-    rfe = RFE(model, 1)
-    fit = rfe.fit(data, response_vector)
-    old_features=features
-
-    for i in fit.ranking_:
-        if i<len(features):
-              rel_features.append(features[i])
-    ranked_index=[old_features.index(x) for x in rel_features if x in old_features]
-       
-    return rel_features ,ranked_index
-#*********************************************************************************************************************************
-def build_table(tables):
-    full_table = dict()
-
-    file_set = set()
-    
-    for table in tables:
-        file_set.update(table.keys())
-        for key, val in table.items():
-            full_table[key] = val
-              
-    files = list(file_set)
-    return full_table, files
-#*********************************************************************************************************************************
-def convert_to_matrix(table, features, files):
-    mat = sp.lil.lil_matrix((len(files), len(features)), dtype=np.int8)
-
-    print("Input Data Size =  ", mat.get_shape())
-    # the response vector
-   
-    cl = [0]*len(files)
-    
-    for key, val in table.items():
-        k = files.index(key)
-    
-        if val[CLASS] is CLASS_BEN:
-            cl[k] = 1
-       
-        for v in val[DATA]:
-            try:
-                idx = features.index(v)
-                mat[k, idx] = 1
-            except Exception as e:
-                print(e)
-                pass              
-        
-    return mat, cl
-#******************************************************************************
-def delete_row_lil(mat, i):
-    if not isinstance(mat, sp.lil.lil_matrix):
-        raise ValueError("works only for LIL format -- use .tolil() first")
-    mat.rows = np.delete(mat.rows, i)
-    mat.data = np.delete(mat.data, i)
-    mat._shape = (mat._shape[0] - 1, mat._shape[1])
-#*****************************************************************Main Function*******************************************************
-def main():
-    simplefilter(action='ignore', category=FutureWarning)
-    args = parse_args()
-    tables = []
-    f_set = set()
-    #read the data
-    for t_files in args.input_tables:
-        table, features = read_table(t_files)
-        f_set = f_set.union(features)
-        tables.append(table)    
-    #************************************build table from data and convert to matrix***************************************************
-    full_table, files = build_table(tables)
-    files.sort()
-    features = list(f_set)
-    features.sort()
-    mat, cl = convert_to_matrix(full_table, features, files)       
-    print("************************Doing feature Ranking on all of the Data*************************")
-    r_features,ranked_index = relevant_features(mat, cl, features)
-    original_selected=ranked_index[1:301]
-    data = sparse.lil_matrix(sparse.csr_matrix(mat)[:,original_selected])
-    
-    #******************************************Split data to train , test and validation**********************************************
-    seed = 10
-    test_size = 0.2
-    X_train, X_test, Y_train, Y_test= train_test_split(data, cl, test_size= test_size, random_state=seed)
-    test_size = 0.25
-    X_train, X_val, Y_train, Y_val= train_test_split(X_train, Y_train, test_size= test_size, random_state=seed) 
-    #***********************************************************************************************************************************
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("*********Semi-Supervised Deep Learning Based Approach Against Label Flipping Attack in Malware Detection System*****************")
-    print("                                                                                                                               ")   
-    
-    X_train=sparse.csr_matrix(X_train)
-    print("row_train,column_train=", X_train.get_shape())
-    print("                                                                   ")
-    X_val=sparse.csr_matrix(X_val)
-    row_val,column_val=X_val.get_shape()
-    print("row_val,column_val=",X_val.get_shape())  
-    print("                                                                   ")
-    X_test=sparse.csr_matrix(X_test)
-    row_test,column_test=X_test.get_shape()
-    print("row_test,column_test=",X_test.get_shape()) 
-    print("                                                                   ")
-    print("********************************************************************")
-    #**************************************************Model Definition*****************************************************************
-    X_train_NoAttack=X_train.copy()
-    Y_train_NoAttack=Y_train[:]
-    
-    X_val_NoAttack=X_val.copy()
-    Y_val_NoAttack=Y_val[:]
-    
-    row_train_NoAttack,column_train_NoAttack=X_train_NoAttack.get_shape()
-    model_main = Sequential()
-    model_main.add(Embedding(row_train_NoAttack, 8, input_length=column_train_NoAttack))
-    model_main.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main.add(Flatten())
-    model_main.add(Dense(1, activation='sigmoid'))
-    model_main.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main.fit(X_train_NoAttack, Y_train_NoAttack, epochs=200, verbose=0)
-   
-    Y_CNN_NoAttack=model_main.predict(X_test, verbose=0)
-    Y_predict_NoAttack=[0]*len(Y_CNN_NoAttack)
-    
-    for i in range(len(Y_CNN_NoAttack)):
-        if Y_CNN_NoAttack[i]<0.5:
-              Y_CNN_NoAttack[i]=0
-        else:
-              Y_CNN_NoAttack[i]=1
-
-    for i in range(len(Y_CNN_NoAttack)):
-        Y_predict_NoAttack[i]= int(Y_CNN_NoAttack[i])    
-    #*****************************************************Result of Model without attack on X_test*****************************************
-    print("********************************Result of Model without attack******************************************************************")
-    loss, accuracy = model_main.evaluate(X_train_NoAttack, Y_train_NoAttack, verbose=2)
-    print('Accuracy for Train set: %f' % (accuracy*100))
-    print('Loss for Train set: %f' % (loss))
-    print("                                                                   ")
-    
-    loss, accuracy = model_main.evaluate(X_val_NoAttack, Y_val_NoAttack, verbose=2)
-    print('Accuracy for Validation set: %f' % (accuracy*100))
-    print('Loss for Train Validation set: %f' % (loss))
-    print("                                                                   ")
-    
-    loss, accuracy = model_main.evaluate(X_test, Y_test, verbose=2)
-    print('Accuracy for Test set: %f' % (accuracy*100))
-    print('Loss for Test set:: %f' % (loss))
-    print("                                                                   ")
-
-    TN_NoAttack, FP_NoAttack, FN_NoAttack, TP_NoAttack = confusion_matrix(Y_test,  Y_predict_NoAttack).ravel()
-    print("TN_NoAttack=",TN_NoAttack)
-    print("FP_NoAttack=",FP_NoAttack)
-    print("FN_NoAttack=",FN_NoAttack)
-    print("TP_NoAttack=",TP_NoAttack)
-    print("                                                                   ")
-
-    if (FP_NoAttack+TN_NoAttack)>0:
-         FPR_NoAttack=FP_NoAttack/(FP_NoAttack+TN_NoAttack)
-         print("The FPR_NoAttack result=", FPR_NoAttack)
-                
-    if (FP_NoAttack+TN_NoAttack)>0:
-         TPR_NoAttack=TP_NoAttack/(TP_NoAttack+FN_NoAttack)
-         print("The TPR_NoAttack result=", TPR_NoAttack)
-                
-    if (TN_NoAttack+FP_NoAttack)>0:
-        TNR_NoAttack=TN_NoAttack/(TN_NoAttack+FP_NoAttack)
-        print("The TNR_NoAttack result=", TNR_NoAttack)
-                
-    if (FN_NoAttack+TP_NoAttack)>0:
-        FNR_NoAttack=FN_NoAttack/(FN_NoAttack+TP_NoAttack)
-        print("The FNR_NoAttack result=", FNR_NoAttack)
-                
-    if ((TN_NoAttack/(TN_NoAttack+FP_NoAttack))+(TP_NoAttack/(TP_NoAttack+FP_NoAttack)))>0:
-        AUC_NoAttack=1/(2*((TN_NoAttack/(TN_NoAttack+FP_NoAttack))+(TP_NoAttack/(TP_NoAttack+FP_NoAttack))))
-        print("The AUC_NoAttack result=", AUC_NoAttack)
-                
-    if  (TP_NoAttack+TN_NoAttack+FP_NoAttack+FN_NoAttack)>0:   
-        ACC_NoAttack=(TP_NoAttack+TN_NoAttack)/(TP_NoAttack+TN_NoAttack+FP_NoAttack+FN_NoAttack)
-        print("The ACC_NoAttack result=", ACC_NoAttack)
-                
-    if  ((TP_NoAttack+FP_NoAttack)*(TP_NoAttack+FN_NoAttack)*(TN_NoAttack+FP_NoAttack)*(TN_NoAttack+FN_NoAttack))>0:
-        MCC_NoAttack=(TP_NoAttack*TN_NoAttack-FP_NoAttack*FN_NoAttack)/math.sqrt((TP_NoAttack+FP_NoAttack)*(TP_NoAttack+FN_NoAttack)*(TN_NoAttack+FP_NoAttack)*(TN_NoAttack+FN_NoAttack))
-        print("The Matthews correlation coefficient result=", MCC_NoAttack)
-    print("                                                                                                                               ")
-    print("*****************************************************End of Without Attack part************************************************")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("*****************************************************Label Flipping Attack*****************************************************")
-    print("                                                                                                                               ")
-    #************************** 
-    # finding Malware of Train data
-    malware_train= sparse.lil_matrix(X_train)
-    cl_malware=list()
-    z_m=0    
-    count_m=0
-    for i, j in enumerate(Y_train):
-         if j == 1:
-            delete_row_lil(malware_train, i-count_m)
-            count_m=count_m+1
-         else:
-            cl_malware.insert(z_m, 1)
-            z_m=z_m+1 
-    #***************************
-    #Finding Benign of Train data
-    cl_X_train=list(Y_train) 
-    benign_train=sparse.lil_matrix(X_train)
-    z_b=0    
-    count_b=0
-    cl_benign=list()
-    for i, j in enumerate(cl_X_train):
-        if j == 0:
-            delete_row_lil(benign_train, i-count_b)
-            count_b=count_b+1
-        else:
-            cl_benign.insert(z_b, 1)
-            z_b=z_b+1
-    print("***********Size of Each Data Part:**********")        
-    print("malware_train=", malware_train.get_shape())
-    print("benign_train=", benign_train.get_shape())
-    #***************************************************
-    row_malware_train,column_malware_train=malware_train.get_shape()
-    #Number_of_flipped_label=int(row_malware_train)
-    
-    X_train_LFA=X_train.copy()
-    Y_train_LFA=Y_train[:]
-    
-    row_train_LFA,column_train_LFA=X_train_LFA.get_shape()
-    clusterer = KMeans(n_clusters=2, random_state=10)
-    X=X_train_LFA.toarray()
-    t0=time.time()
-    cluster_labels = clusterer.fit_predict(X)
-    sample_silhouette_values = silhouette_samples(X, cluster_labels)
-    #print("sample_silhouette_values=",sample_silhouette_values)
-    
-    flipped_Y_train=list(Y_train_LFA)
-    counter=0
-    for new_index in range(row_train_LFA): 
-        if (sample_silhouette_values[new_index]<0.1):                           #and (flipped_Y_train[new_index]==0)
-             flipped_Y_train[new_index]=abs(flipped_Y_train[new_index]-1)     #flipped_Y_train[new_index]=1
-             counter=counter+1
-
-    print("Flipped  counter=", counter)         
-    t1=time.time()
-    print("Time for Label Flipping Attack =",t1-t0)
-    print("                                                                   ")
-    
-     #**************************************************************************
-    model_main_LFA_Final = Sequential()
-    model_main_LFA_Final.add(Embedding(row_train_LFA, 8, input_length=column_train_LFA))
-    model_main_LFA_Final.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main_LFA_Final.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_LFA_Final.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main_LFA_Final.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_LFA_Final.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main_LFA_Final.add(Flatten())
-    model_main_LFA_Final.add(Dense(1, activation='sigmoid'))
-    model_main_LFA_Final.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main_LFA_Final.fit(X_train_LFA, flipped_Y_train, epochs=200, verbose=0)
-    
-   
-    Y_predict_LFA=model_main_LFA_Final.predict(X_test, verbose=0)
-    Y_predict_LFA_Final=[0]*len(Y_predict_LFA)
-    
-    for i in range(len(Y_predict_LFA)):
-        if Y_predict_LFA[i]<0.5:
-              Y_predict_LFA[i]=0
-        else:
-              Y_predict_LFA[i]=1
-
-    for i in range(len(Y_predict_LFA)):
-        Y_predict_LFA_Final[i]= int(Y_predict_LFA[i])    
-    #*****************************************************Result of Model with  LFA ******************************************************
-    print("********************************Result of Model with LFA attack **************************************************************")
-    print("                                                                                                                              ")
-    loss, accuracy = model_main_LFA_Final.evaluate(X_train_LFA, flipped_Y_train, verbose=2)
-    print('Accuracy for Train set: %f' % (accuracy*100))
-    print('Loss for Train set: %f' % (loss))
-    print("                                                                   ")
-   
-    loss, accuracy = model_main_LFA_Final.evaluate(X_test, Y_test, verbose=2)
-    print('Accuracy for Test set: %f' % (accuracy*100))
-    print('Loss for Test set:: %f' % (loss))
-    print("                                                                   ")
-
-    TN_LFA, FP_LFA, FN_LFA, TP_LFA = confusion_matrix(Y_test,  Y_predict_LFA_Final).ravel()
-    print("TN_LFA=",TN_LFA)
-    print("FP_LFA=",FP_LFA)
-    print("FN_LFA=",FN_LFA)
-    print("TP_LFA=",TP_LFA)
-    print("                                                                   ")
-
-    if (FP_LFA+TN_LFA)>0:
-         FPR_LFA=FP_LFA/(FP_LFA+TN_LFA)
-         print("The FPR_LFA result=", FPR_LFA)
-                
-    if (FP_LFA+TN_LFA)>0:
-         TPR_LFA=TP_LFA/(TP_LFA+FN_LFA)
-         print("The TPR_LFA result=", TPR_LFA)
-                
-    if (TN_LFA+FP_LFA)>0:
-        TNR_LFA=TN_LFA/(TN_LFA+FP_LFA)
-        print("The TNR_LFA result=", TNR_LFA)
-                
-    if (FN_LFA+TP_LFA)>0:
-        FNR_LFA=FN_LFA/(FN_LFA+TP_LFA)
-        print("The FNR_LFA result=", FNR_LFA)
-                
-    if ((TN_LFA/(TN_LFA+FP_LFA))+(TP_LFA/(TP_LFA+FP_LFA)))>0:
-        AUC_LFA=1/(2*((TN_LFA/(TN_LFA+FP_LFA))+(TP_LFA/(TP_LFA+FP_LFA))))
-        print("The AUC_LFA result=", AUC_LFA)
-                
-    if  (TP_LFA+TN_LFA+FP_LFA+FN_LFA)>0:   
-        ACC_LFA=(TP_LFA+TN_LFA)/(TP_LFA+TN_LFA+FP_LFA+FN_LFA)
-        print("The ACC_LFAk result=", ACC_LFA)
-                
-    if  ((TP_LFA+FP_LFA)*(TP_LFA+FN_LFA)*(TN_LFA+FP_LFA)*(TN_LFA+FN_LFA))>0:
-        MCC_LFA=(TP_LFA*TN_LFA-FP_LFA*FN_LFA)/math.sqrt((TP_LFA+FP_LFA)*(TP_LFA+FN_LFA)*(TN_LFA+FP_LFA)*(TN_LFA+FN_LFA))
-        print("The Matthews correlation coefficient result=", MCC_LFA)   
-    print("                                                                                                                               ")
-    print("************************************************End of Label Flipping Attack part**********************************************")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("*****************************************************KNN Based Semi-Supervised Defense(KSD)************************************")
-    print("                                                                                                                               ")
-
-    X_train_KNN=X_train.copy()
-    Y_train_KNN=flipped_Y_train[:]
-    
-    X_val_KNN=X_val.copy()
-    Y_val_KNN=Y_val[:]
-    
-    row_train_KNN,column_train_KNN=X_train_KNN.get_shape()
-        
-    Number_of_flipped_label=int(row_train_KNN/50)
-    Y_train_corrected_By_KNN=list(Y_train_KNN)
-
-    c=0
-    m=0
-    t2=time.time()
-    
-    for i in list(range(Number_of_flipped_label)):
-         row_KNN=X_train_KNN.getrow(i)
-         distances = sklearn.metrics.pairwise.manhattan_distances(row_KNN,X_val_KNN)
-         indices = distances.argsort()[:10]
-         d=indices[0]
-         a=d[0:10]
-        
-         F=0
-         for j in range(len(a)):
-                 t=a[j]
-                 F=F+Y_val_KNN[t]
-         fraction=F/10
-         if fraction>=0.5:
-             Y_train_corrected_By_KNN[i]=1
-             m=m+1
-         else: 
-             Y_train_corrected_By_KNN[i]=0
-             c=c+1
-    Y_train_corrected_By_KNN_Final=np.array(Y_train_corrected_By_KNN)  
-    t3=time.time()
-    print("Time for KNN Based Semi-Supervised Defense(KSD) =",t3-t2)
-    print("                                                                   ")
-
-    model_main_KNN = Sequential()
-    model_main_KNN.add(Embedding(row_train_NoAttack, 8, input_length=column_train_NoAttack))
-    model_main_KNN.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main_KNN.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_KNN.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main_KNN.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_KNN.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main_KNN.add(Flatten())
-    model_main_KNN.add(Dense(1, activation='sigmoid'))
-    model_main_KNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main_KNN.fit(X_train_KNN,Y_train_corrected_By_KNN_Final, epochs=20, batch_size=32, verbose=0)
-    Y_predict_KNN=model_main_KNN.predict(X_test, verbose=0)
-    
-    Y_predict_KNN_Final=[0]*len(Y_predict_KNN)
-    for i in range(len(Y_predict_KNN)):
-        if Y_predict_KNN[i]<0.5:
-              Y_predict_KNN[i]=0
-        else:
-              Y_predict_KNN[i]=1
-    
-    for i in range(len(Y_predict_KNN)):
-        Y_predict_KNN_Final[i]= int(Y_predict_KNN[i])
-    #*****************************************************Result of Model After KNN Based Defense*****************************************
-    print("************************Result After KNN_Based Defense************************************************************************")
-    print("                                                                                                                               ")
-
-    loss, accuracy = model_main_KNN.evaluate(X_train_KNN, Y_train_KNN, verbose=0)
-    print('Accuracy for Train set: %f' % (accuracy*100))
-    print('Loss for Train set: %f' % (loss))
-    print("                                                                   ")
-
-    loss, accuracy = model_main_KNN.evaluate(X_test, Y_test, batch_size=32, verbose=0)
-    print('Accuracy After KNN-Based Defense: %f' % (accuracy*100))
-    print('Loss After KNN-Based Defense: %f' % (loss))
-    print("                                                                   ")
-
-    TN_KNN, FP_KNN, FN_KNN, TP_KNN = confusion_matrix(Y_test,   Y_predict_KNN_Final).ravel()
-    print("TN_KNN=",TN_KNN)
-    print("FP_KNN=",FP_KNN)
-    print("FN_KNN=",FN_KNN)
-    print("TP_KNN=",TP_KNN)
-    print("                                                                   ")
-
-    if (FP_KNN+TN_KNN)>0:
-         FPR_KNN=FP_KNN/(FP_KNN+TN_KNN)
-         print("The FPR_KNN result=", FPR_KNN)
-                
-    if (FP_KNN+TN_KNN)>0:
-         TPR_KNN=TP_KNN/(TP_KNN+FN_KNN)
-         print("The TPR_KNN result=", TPR_KNN)
-                
-    if (TN_KNN+FP_KNN)>0:
-        TNR_KNN=TN_KNN/(TN_KNN+FP_KNN)
-        print("The TNR_KNN result=", TNR_KNN)
-                
-    if (FN_KNN+TP_KNN)>0:
-        FNR_KNN=FN_KNN/(FN_KNN+TP_KNN)
-        print("The FNR_KNN result=", FNR_KNN)
-                
-    if ((TN_KNN/(TN_KNN+FP_KNN))+(TP_KNN/(TP_KNN+FP_KNN)))>0:
-        AUC_KNN=1/(2*((TN_KNN/(TN_KNN+FP_KNN))+(TP_KNN/(TP_KNN+FP_KNN))))
-        print("The AUC_KNN result=", AUC_KNN)
-                
-    if  (TP_KNN+TN_KNN+FP_KNN+FN_KNN)>0:   
-        ACC_KNN=(TP_KNN+TN_KNN)/(TP_KNN+TN_KNN+FP_KNN+FN_KNN)
-        print("The ACC_KNN result=", ACC_KNN)
-                
-    if  ((TP_KNN+FP_KNN)*(TP_KNN+FN_KNN)*(TN_KNN+FP_KNN)*(TN_KNN+FN_KNN))>0:
-        MCC_KNN=(TP_KNN*TN_KNN-FP_KNN*FN_KNN)/math.sqrt((TP_KNN+FP_KNN)*(TP_KNN+FN_KNN)*(TN_KNN+FP_KNN)*(TN_KNN+FN_KNN))
-        print("The Matthews correlation coefficient result=", MCC_KNN)
-    print("                                                                                                                               ")
-    print("************************************************End of KNN Based Semi-Supervised Defense(KSD) part*****************************")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")    
-    print("*****************************************************Label Based Semi-supervised Defense(LSD)**********************************")
-    print("                                                                                                                               ")
-    #***********************label Propagation and Label Spreading for Using in Label Based Semi-supervised Defense(LSD) *******************
-    X_train_LSD=X_train.copy()
-    Y_train_LSD=flipped_Y_train[:]
-    
-    X_val_LSD=X_val.copy()
-    Y_val_LSD=Y_val[:]
-    row_val_LSD,column_val_LSD=X_val_LSD.get_shape()
-    row_train_LSD,column_train_LSD=X_train_LSD.get_shape()
-    
-    t4=time.time()
-    
-    labels = np.full(row_train_LSD, -1)
-    for i in range(row_val_LSD):
-        labels[i] = Y_val_LSD[i]
-
-    X=X_train_LSD.toarray()
-    label_spread = label_propagation.LabelSpreading(kernel='knn', alpha=0.8)
-    label_propa=label_propagation.LabelPropagation(kernel='knn', gamma=20, n_neighbors=7, max_iter=1000, tol=0.001, n_jobs=None)
-    label_spread.fit(X, labels)
-    label_propa.fit(X, labels)
-    output_labels_spread = label_spread.transduction_
-    output_labels_propa = label_propa.transduction_
-    #*******************Convolutional Neural Network for Using in Label Based Semi-supervised Defense(LSD) ******************************
-    CNN_model_for_LSD = Sequential()
-    CNN_model_for_LSD.add(Embedding(row_train_LSD, 8, input_length=column_train_LSD))
-    CNN_model_for_LSD.add(Conv1D(16,2, strides=2, padding='same'))
-    CNN_model_for_LSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    CNN_model_for_LSD.add(Conv1D(32,2, strides=2, padding='same'))
-    CNN_model_for_LSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    CNN_model_for_LSD.add(Conv1D(64,2, strides=2, padding='same'))
-    CNN_model_for_LSD.add(Flatten())
-    
-    CNN_model_for_LSD.add(Dense(1, activation='sigmoid'))
-    CNN_model_for_LSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    CNN_model_for_LSD.fit(X_train_LSD, Y_train_LSD, epochs=200, verbose=0)
-    
-    Y_predict_CNN_for_LSD=CNN_model_for_LSD.predict(X_train_LSD, verbose=0)
-    
-    Y_predict_CNN_LSD_Final=[0]*len(Y_predict_CNN_for_LSD)
-    for i in range(len(Y_predict_CNN_for_LSD)):
-        if Y_predict_CNN_for_LSD[i]<0.5:
-              Y_predict_CNN_for_LSD[i]=0
-        else:
-               Y_predict_CNN_for_LSD[i]=1
-    
-    for i in range(len(Y_predict_CNN_for_LSD)):
-        Y_predict_CNN_LSD_Final[i]= int(Y_predict_CNN_for_LSD[i])
-    #*******************************************Voting Between CNN , label Propagation and Label Spreading**************************     
-    Y_predict_LSD_Final=[0]*len(Y_train)
-    for i in range(len(Y_train)):
-        c=Y_train_LSD[i]+Y_predict_CNN_LSD_Final[i]+output_labels_propa[i]+output_labels_spread[i]
-        if 2<=c:
-            Y_predict_LSD_Final[i]=1
-        else:
-            Y_predict_LSD_Final[i]=0
-    t5=time.time()
-    print("Time for Label Based Semi-supervised Defense =",t5-t4)
-    print("                                                                                                                               ")
-    #*********************************************************************************************************************************
-    model_main_LSD = Sequential()
-    model_main_LSD.add(Embedding(row_train_LSD, 8, input_length=column_train_LSD))
-    model_main_LSD.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main_LSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_LSD.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main_LSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_LSD.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main_LSD.add(Flatten())
-    model_main_LSD.add(Dense(1, activation='sigmoid'))
-    model_main_LSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main_LSD.fit(X_train_LSD, Y_predict_LSD_Final, epochs=200, verbose=0)
-        
-    Y_predict_LSD_Defense=model_main_LSD.predict(X_test, verbose=0)
-    Y_predict_LSD_Defense_Final=[0]*len(Y_predict_LSD_Defense)
-    
-    for i in range(len(Y_predict_LSD_Defense)):
-        if Y_predict_LSD_Defense[i]<0.5:
-              Y_predict_LSD_Defense[i]=0
-        else:
-               Y_predict_LSD_Defense[i]=1
-    
-    for i in range(len(Y_predict_LSD_Defense)):
-        Y_predict_LSD_Defense_Final[i]= int(Y_predict_LSD_Defense[i])  
-    #**************************************Result of Model after Label Based Semi-supervised Defense(LSD)**********************************
-    print("************************Result of Model after Label Based Semi-supervised Defense(LSD)*****************************************")
-    print("                                                                                                                               ")
-    loss, accuracy = model_main.evaluate(X_train, Y_predict_LSD_Final, verbose=2)
-    print('Accuracy for Train set: %f' % (accuracy*100))
-    print('Loss for Train set: %f' % (loss))
-    print("                                                                   ")
-
-    loss, accuracy = model_main.evaluate(X_test, Y_test, verbose=2)
-    print('Accuracy for Test set: %f' % (accuracy*100))
-    print('Loss for Test set:: %f' % (loss))
-    print("                                                                   ")
-
-    TN_LSD, FP_LSD, FN_LSD, TP_LSD = confusion_matrix(Y_test,  Y_predict_LSD_Defense_Final).ravel()
-    print("TN_LSD=",TN_LSD)
-    print("FP_LSD=",FP_LSD)
-    print("FN_LSD=",FN_LSD)
-    print("TP_LSD=",TP_LSD)
-    print("                                                                   ")
-
-    if (FP_LSD+TN_LSD)>0:
-         FPR_LSD=FP_LSD/(FP_LSD+TN_LSD)
-         print("The FPR_LSD result=", FPR_LSD)
-                
-    if (FP_LSD+TN_LSD)>0:
-         TPR_LSD=TP_LSD/(TP_LSD+FN_LSD)
-         print("The TPR_LSD result=", TPR_LSD)
-                
-    if (TN_LSD+FP_LSD)>0:
-        TNR_LSD=TN_LSD/(TN_LSD+FP_LSD)
-        print("The TNR_LSD result=", TNR_LSD)
-                
-    if (FN_LSD+TP_LSD)>0:
-        FNR_LSD=FN_LSD/(FN_LSD+TP_LSD)
-        print("The FNR_LSD result=", FNR_LSD)
-                
-    if ((TN_LSD/(TN_LSD+FP_LSD))+(TP_LSD/(TP_LSD+FP_LSD)))>0:
-        AUC_LSD=1/(2*((TN_LSD/(TN_LSD+FP_LSD))+(TP_LSD/(TP_LSD+FP_LSD))))
-        print("The AUC result=", AUC_LSD)
-                
-    if  (TP_LSD+TN_LSD+FP_LSD+FN_LSD)>0:   
-        ACC_LSD=(TP_LSD+TN_LSD)/(TP_LSD+TN_LSD+FP_LSD+FN_LSD)
-        print("The ACC result=", ACC_LSD)
-                
-    if  ((TP_LSD+FP_LSD)*(TP_LSD+FN_LSD)*(TN_LSD+FP_LSD)*(TN_LSD+FN_LSD))>0:
-        MCC_LSD=(TP_LSD*TN_LSD-FP_LSD*FN_LSD)/math.sqrt((TP_LSD+FP_LSD)*(TP_LSD+FN_LSD)*(TN_LSD+FP_LSD)*(TN_LSD+FN_LSD))
-        print("The Matthews correlation coefficient result=", MCC_LSD) 
-    print("                                                                                                                               ")
-    print("*****************************************************End of Label Based Semi-supervised Defense(LSD)***************************")   
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("*****************************************************Clustering Based Semi-supervised Defense(CSD)*****************************")
-    print("                                                                                                                               ")
-
-    X_train_CSD=X_train.copy()
-    Y_train_CSD=flipped_Y_train[:]
-    
-    X_val_CSD=X_val.copy()
-    Y_val_CSD=Y_val[:]
-    row_train_CSD,column_train_CSD=X_train_CSD.get_shape()
-    
-    t6=time.time()
-
-    Y_predict_val_from_CNN_Model=model_main.predict(X_val_CSD, verbose=0)
-    
-    Y_predict_val_from_CNN_Model_Final=[0]*len(Y_predict_val_from_CNN_Model)
-    for i in range(len(Y_predict_val_from_CNN_Model)):
-        if Y_predict_val_from_CNN_Model[i]<0.5:
-              Y_predict_val_from_CNN_Model[i]=0
-        else:
-              Y_predict_val_from_CNN_Model[i]=1
-    for i in range(len(Y_predict_val_from_CNN_Model)):
-       Y_predict_val_from_CNN_Model_Final[i]= int(Y_predict_val_from_CNN_Model[i])
-        
-    adjusted_rand_score_val=metrics.adjusted_rand_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final)
-    adjusted_mutual_info_score_val=metrics.adjusted_mutual_info_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) 
-    homogeneity_score_val=metrics.homogeneity_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) 
-    fowlkes_mallows_score_val=metrics.fowlkes_mallows_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) 
-  
-    for i in range(20):        #row_train
-        Y_temp=Y_val_CSD.copy()
-
-        row=X_train_CSD.getrow(i)
-        X_temp = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_val_CSD, row))))
-        Y_temp.append(Y_train_CSD[i])
-        
-        Y_predict_CNN_compute_CSD=model_main.predict(X_temp, verbose=0)
-        
-        Y_predict_temp=[0]*len(Y_predict_CNN_compute_CSD)
-        
-        for n in range(len(Y_predict_CNN_compute_CSD)):
-            if Y_predict_CNN_compute_CSD[n]<0.5:
-                  Y_predict_CNN_compute_CSD[n]=0
-            else:
-                   Y_predict_CNN_compute_CSD[n]=1
-         
-        for m in range(len(Y_predict_CNN_compute_CSD)):
-            Y_predict_temp[m]= int(Y_predict_CNN_compute_CSD[m])
-
-        adjusted_rand_score_temp=metrics.adjusted_rand_score(Y_temp, Y_predict_temp)
-        adjusted_mutual_info_score_temp=metrics.adjusted_mutual_info_score(Y_temp, Y_predict_temp) 
-        homogeneity_score_temp=metrics.homogeneity_score(Y_temp, Y_predict_temp) 
-        fowlkes_mallows_score_temp=metrics.fowlkes_mallows_score(Y_temp, Y_predict_temp)
-        
-        landa1=abs(adjusted_rand_score_temp-adjusted_rand_score_val)
-        landa2=abs(adjusted_mutual_info_score_temp-adjusted_mutual_info_score_val)
-        landa3=abs(homogeneity_score_temp-homogeneity_score_val)
-        landa4=abs(fowlkes_mallows_score_temp-fowlkes_mallows_score_val)
-        
-        sum_of_diffrences=landa1+landa2+landa3+landa4
-        
-        if sum_of_diffrences<0.1:
-            X_val_CSD = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_val_CSD, row))))
-            Y_val_CSD.append(Y_train_CSD[i])          
-            Y_predict_CNN_inside_CSD=model_main.predict(X_val_CSD, verbose=0)
-            
-            Y_predict_CNN_inside_CSD_Final=[0]*len(Y_predict_CNN_inside_CSD)                   #Y_predict_CNN_inside
-            for j in range(len(Y_predict_CNN_inside_CSD)):                                     #Y_predict_CNN_inside
-                if Y_predict_CNN_inside_CSD[j]<0.5:
-                      Y_predict_CNN_inside_CSD[j]=0
-                else:
-                      Y_predict_CNN_inside_CSD[j]=1
-                       
-            for k in range(len(Y_predict_CNN_inside_CSD)):                              #Y_predict_CNN_inside
-                Y_predict_CNN_inside_CSD_Final[k]= int(Y_predict_CNN_inside_CSD[k])
-
-            adjusted_rand_score_val=metrics.adjusted_rand_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final)
-            adjusted_mutual_info_score_val=metrics.adjusted_mutual_info_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) 
-            homogeneity_score_val=metrics.homogeneity_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) 
-            fowlkes_mallows_score_val=metrics.fowlkes_mallows_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) 
-    t7=time.time()
-    print("Time for Clustering Based Semi-supervised Defense =",t7-t6)
-    print("                                                                   ")
-    #**************************************************************************************** 
-    X_train_Final_CSD= X_val_CSD.copy()  
-    Y_train_Final_CSD=Y_val_CSD.copy()
-    row_train_CSD_Final,col_train_CSD_Final=X_train_Final_CSD.get_shape()    
-    
-    model_main_CSD = Sequential()
-    model_main_CSD.add(Embedding(row_train_CSD_Final, 8, input_length=col_train_CSD_Final))
-    model_main_CSD.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main_CSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_CSD.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main_CSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_CSD.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main_CSD.add(Flatten())
-    model_main_CSD.add(Dense(1, activation='sigmoid'))
-    model_main_CSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main_CSD.fit(X_train_Final_CSD, Y_train_Final_CSD, epochs=200, verbose=0)
-       
-    Y_test_predict_CSD=model_main_CSD.predict(X_test, verbose=0)
-    
-    Y_test_predict_CSD_Final=[0]*len(Y_test_predict_CSD)
-    for i in range(len(Y_test_predict_CSD)):
-        if Y_test_predict_CSD[i]<0.5:
-              Y_test_predict_CSD[i]=0
-        else:
-              Y_test_predict_CSD[i]=1
-    
-    for i in range(len(Y_test_predict_CSD)):
-        Y_test_predict_CSD_Final[i]= int(Y_test_predict_CSD[i])
-        
-    #*****************************************************Result of Model after Clustering Based Semi-supervised Defense(CSD)**************
-    print("***********************Result of Model after Clustering Based Semi-supervised Defense(CSD)*************************************")  
-    print("                                                                                                                               ")
-
-    loss, accuracy = model_main_CSD.evaluate(X_train_Final_CSD, Y_train_Final_CSD, verbose=2)
-    print('Accuracy for New Train set: %f' % (accuracy*100))
-    print('Loss for New Train set: %f' % (loss))
-    print("                                                                   ")
-
-    loss, accuracy = model_main_CSD.evaluate(X_test, Y_test, verbose=2)
-    print('Accuracy for Test set: %f' % (accuracy*100))
-    print('Loss for Test set:: %f' % (loss))
-    print("                                                                   ")
-
-    TN_CSD, FP_CSD, FN_CSD, TP_CSD = confusion_matrix(Y_test,  Y_test_predict_CSD_Final).ravel()
-    print("TN_CSD=",TN_CSD)
-    print("FP_CSD=",FP_CSD)
-    print("FN_CSD=",FN_CSD)
-    print("TP_CSD=",TP_CSD)
-    print("                                                                   ")
-
-    if (FP_CSD+TN_CSD)>0:
-         FPR_CSD=FP_CSD/(FP_CSD+TN_CSD)
-         print("The FPR_CSD result=", FPR_CSD)
-                
-    if (FP_CSD+TN_CSD)>0:
-         TPR_CSD=TP_CSD/(TP_CSD+FN_CSD)
-         print("The TPR_CSD result=", TPR_CSD)
-                
-    if (TN_CSD+FP_CSD)>0:
-        TNR_CSD=TN_CSD/(TN_CSD+FP_CSD)
-        print("The TNR_CSD result=", TNR_CSD)
-                
-    if (FN_CSD+TP_CSD)>0:
-        FNR_CSD=FN_CSD/(FN_CSD+TP_CSD)
-        print("The FNR_CSD result=", FNR_CSD)
-                
-    if ((TN_CSD/(TN_CSD+FP_CSD))+(TP_CSD/(TP_CSD+FP_CSD)))>0:
-        AUC_CSD=1/(2*((TN_CSD/(TN_CSD+FP_CSD))+(TP_CSD/(TP_CSD+FP_CSD))))
-        print("The AUC_CSD result=", AUC_CSD)
-                
-    if  (TP_CSD+TN_CSD+FP_CSD+FN_CSD)>0:   
-        ACC_CSD=(TP_CSD+TN_CSD)/(TP_CSD+TN_CSD+FP_CSD+FN_CSD)
-        print("The ACC_CSD result=", ACC_CSD)
-                
-    if  ((TP_CSD+FP_CSD)*(TP_CSD+FN_CSD)*(TN_CSD+FP_CSD)*(TN_CSD+FN_CSD))>0:
-        MCC_CSD=(TP_CSD*TN_CSD-FP_CSD*FN_CSD)/math.sqrt((TP_CSD+FP_CSD)*(TP_CSD+FN_CSD)*(TN_CSD+FP_CSD)*(TN_CSD+FN_CSD))
-        print("The Matthews correlation coefficient result=", MCC_CSD)
-    print("                                                                   ")
-    print("************************************************End of Clustering Based Semi-supervised Defense(LSD)***************************")   
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-#******************************************************************************************************************************************
-if __name__ == "__main__":
-    main()
-#******************************************************************************  
\ No newline at end of file
diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_without_Feature_Selection(LSD_CSD_KDD).py b/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_without_Feature_Selection(LSD_CSD_KDD).py
deleted file mode 100644
index baf5038c8542251e51c12edfdbae2473cab18d83..0000000000000000000000000000000000000000
--- a/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_without_Feature_Selection(LSD_CSD_KDD).py
+++ /dev/null
@@ -1,833 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jun 21 14:25:17 2019
-
-@author: Rahim
-"""
-#*****************************************************************import Library*****************************************************************************
-from __future__ import print_function
-from sklearn.feature_selection import SelectFromModel
-from sklearn.feature_selection import SelectKBest, f_regression
-from sklearn.model_selection import KFold
-from sklearn.model_selection import cross_val_score
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import classification_report
-from sklearn.model_selection import train_test_split 
-from sklearn.metrics import confusion_matrix
-from sklearn import model_selection
-from sklearn.feature_selection import RFE
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.ensemble import RandomForestRegressor
-from scipy.sparse import csr_matrix, vstack, hstack
-from scipy.sparse import coo_matrix
-from keras.preprocessing.text import one_hot
-from sklearn import metrics
-from sklearn.metrics import silhouette_samples, silhouette_score
-from sklearn.semi_supervised import LabelPropagation
-from sklearn.semi_supervised import LabelSpreading
-from sklearn.semi_supervised import label_propagation
-from sklearn.metrics import roc_auc_score
-from sklearn.metrics import f1_score
-from sklearn.cluster import KMeans
-import math
-#import keras
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation , Flatten
-from sklearn.metrics import log_loss
-from keras.optimizers import SGD
-from keras.layers.normalization import BatchNormalization
-from keras.layers.convolutional import UpSampling2D
-from keras.layers.convolutional import Conv2D, MaxPooling2D, MaxPooling1D
-from keras.layers.embeddings import Embedding
-from scipy import sparse
-import pandas as pd
-import numpy as np
-#import random
-import sklearn
-from sklearn.metrics.pairwise import manhattan_distances
-from keras.models import Model
-from keras.layers import  Conv1D, multiply, GlobalMaxPool1D, Input , Lambda
-import time
-import argparse
-#import math
-from numpy import *
-import os.path as osp
-import scipy.sparse as sp
-import pickle
-from sklearn.metrics import accuracy_score
-#*********************************************************************************************************************************
-CLASS = 'class'
-CLASS_BEN = 'B'
-CLASS_MAL = 'M'
-DATA = 'data'
-#********************************************Functions that will be used in this program*****************************************
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input-tables', nargs='*', dest='input_tables')
-
-    args = parser.parse_args()
-
-    return args
-#*********************************************************************************************************************************
-def read_table(table_file):
-    
-        table = dict()
-        
-        with open(table_file, 'rb') as handle:
-            while True:
-                   try:
-                           table = pickle.load(handle)
-                   except EOFError:
-                           break
-        
-        f_set=set()
-        
-        for k,v in table.items():
-             for feature in v[DATA]:
-                f_set.add(feature)
-               
-        return table , f_set
-#*********************************************************************************************************************************
-def build_table(tables):
-    full_table = dict()
-
-    file_set = set()
-    
-    for table in tables:
-        file_set.update(table.keys())
-        for key, val in table.items():
-            full_table[key] = val
-              
-    files = list(file_set)
-    return full_table, files
-#*********************************************************************************************************************************
-def convert_to_matrix(table, features, files):
-    mat = sp.lil.lil_matrix((len(files), len(features)), dtype=np.int8)
-
-    print("Input Data Size =  ", mat.get_shape())
-    # the response vector
-   
-    cl = [0]*len(files)
-    
-    for key, val in table.items():
-        k = files.index(key)
-    
-        if val[CLASS] is CLASS_BEN:
-            cl[k] = 1
-       
-        for v in val[DATA]:
-            try:
-                idx = features.index(v)
-                mat[k, idx] = 1
-            except Exception as e:
-                print(e)
-                pass              
-        
-    return mat, cl
-#******************************************************************************
-def delete_row_lil(mat, i):
-    if not isinstance(mat, sp.lil.lil_matrix):
-        raise ValueError("works only for LIL format -- use .tolil() first")
-    mat.rows = np.delete(mat.rows, i)
-    mat.data = np.delete(mat.data, i)
-    mat._shape = (mat._shape[0] - 1, mat._shape[1])
-#******************************************************************************
-def relevant_features(data, response_vector, features):
-    rel_features = list()
-    ranked_index=list()
-    
-    model =RandomForestRegressor()
-    rfe = RFE(model, 1)
-    fit = rfe.fit(data, response_vector)
-    old_features=features
-
-    for i in fit.ranking_:
-        if i<len(features):
-              rel_features.append(features[i])
-    ranked_index=[old_features.index(x) for x in rel_features if x in old_features]
-       
-    return rel_features ,ranked_index
-#*****************************************************************Main Function*******************************************************
-def main():
-    args = parse_args()
-    tables = []
-    f_set = set()
-    #read the data
-    for t_files in args.input_tables:
-        table, features = read_table(t_files)
-        f_set = f_set.union(features)
-        tables.append(table)    
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("*********Semi-Supervised Deep Learning Based Approach Against Label Flipping Attack in Malware Detection System*****************")
-    print("                                                                                                                               ")
-    #************************************build table from data and convert to matrix***************************************************
-    full_table, files = build_table(tables)
-    files.sort()
-    features = list(f_set)
-    features.sort()
-    mat, cl = convert_to_matrix(full_table, features, files)       
-    data = sparse.lil_matrix(sparse.csr_matrix(mat))
-    #******************************************Split data to train , test and validation**********************************************
-    seed = 10
-    test_size = 0.2
-    X_train, X_test, Y_train, Y_test= train_test_split(data, cl, test_size= test_size, random_state=seed)
-    test_size = 0.25
-    X_train, X_val, Y_train, Y_val= train_test_split(X_train, Y_train, test_size= test_size, random_state=seed)      
-    #***********************************************************************************************************************************
-    X_train=sparse.csr_matrix(X_train)
-    print("row_train,column_train=", X_train.get_shape())
-    print("                                                                   ")
-    X_val=sparse.csr_matrix(X_val)
-    row_val,column_val=X_val.get_shape()
-    print("row_val,column_val=",X_val.get_shape())  
-    print("                                                                   ")
-    X_test=sparse.csr_matrix(X_test)
-    row_test,column_test=X_test.get_shape()
-    print("row_test,column_test=",X_test.get_shape()) 
-    print("                                                                   ")
-    print("********************************************************************")
-    #**************************************************Model Definition*****************************************************************
-    X_train_NoAttack=X_train.copy()
-    Y_train_NoAttack=Y_train[:]
-    
-    X_val_NoAttack=X_val.copy()
-    Y_val_NoAttack=Y_val[:]
-    
-    row_train_NoAttack,column_train_NoAttack=X_train_NoAttack.get_shape()
-    model_main = Sequential()
-    model_main.add(Embedding(row_train_NoAttack, 8, input_length=column_train_NoAttack))
-    model_main.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main.add(Flatten())
-    model_main.add(Dense(1, activation='sigmoid'))
-    model_main.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main.fit(X_train_NoAttack, Y_train_NoAttack, epochs=200, verbose=0)
-   
-    Y_CNN_NoAttack=model_main.predict(X_test, verbose=0)
-    Y_predict_NoAttack=[0]*len(Y_CNN_NoAttack)
-    
-    for i in range(len(Y_CNN_NoAttack)):
-        if Y_CNN_NoAttack[i]<0.5:
-              Y_CNN_NoAttack[i]=0
-        else:
-              Y_CNN_NoAttack[i]=1
-
-    for i in range(len(Y_CNN_NoAttack)):
-        Y_predict_NoAttack[i]= int(Y_CNN_NoAttack[i])    
-    #*****************************************************Result of Model without attack on X_test*****************************************
-    print("********************************Result of Model without attack******************************************************************")
-    loss, accuracy = model_main.evaluate(X_train_NoAttack, Y_train_NoAttack, verbose=2)
-    print('Accuracy for Train set: %f' % (accuracy*100))
-    print('Loss for Train set: %f' % (loss))
-    print("                                                                   ")
-    
-    loss, accuracy = model_main.evaluate(X_val_NoAttack, Y_val_NoAttack, verbose=2)
-    print('Accuracy for Validation set: %f' % (accuracy*100))
-    print('Loss for Train Validation set: %f' % (loss))
-    print("                                                                   ")
-    
-    loss, accuracy = model_main.evaluate(X_test, Y_test, verbose=2)
-    print('Accuracy for Test set: %f' % (accuracy*100))
-    print('Loss for Test set:: %f' % (loss))
-    print("                                                                   ")
-
-    TN_NoAttack, FP_NoAttack, FN_NoAttack, TP_NoAttack = confusion_matrix(Y_test,  Y_predict_NoAttack).ravel()
-    print("TN_NoAttack=",TN_NoAttack)
-    print("FP_NoAttack=",FP_NoAttack)
-    print("FN_NoAttack=",FN_NoAttack)
-    print("TP_NoAttack=",TP_NoAttack)
-    print("                                                                   ")
-
-    if (FP_NoAttack+TN_NoAttack)>0:
-         FPR_NoAttack=FP_NoAttack/(FP_NoAttack+TN_NoAttack)
-         print("The FPR_NoAttack result=", FPR_NoAttack)
-                
-    if (FP_NoAttack+TN_NoAttack)>0:
-         TPR_NoAttack=TP_NoAttack/(TP_NoAttack+FN_NoAttack)
-         print("The TPR_NoAttack result=", TPR_NoAttack)
-                
-    if (TN_NoAttack+FP_NoAttack)>0:
-        TNR_NoAttack=TN_NoAttack/(TN_NoAttack+FP_NoAttack)
-        print("The TNR_NoAttack result=", TNR_NoAttack)
-                
-    if (FN_NoAttack+TP_NoAttack)>0:
-        FNR_NoAttack=FN_NoAttack/(FN_NoAttack+TP_NoAttack)
-        print("The FNR_NoAttack result=", FNR_NoAttack)
-                
-    if ((TN_NoAttack/(TN_NoAttack+FP_NoAttack))+(TP_NoAttack/(TP_NoAttack+FP_NoAttack)))>0:
-        AUC_NoAttack=1/(2*((TN_NoAttack/(TN_NoAttack+FP_NoAttack))+(TP_NoAttack/(TP_NoAttack+FP_NoAttack))))
-        print("The AUC_NoAttack result=", AUC_NoAttack)
-                
-    if  (TP_NoAttack+TN_NoAttack+FP_NoAttack+FN_NoAttack)>0:   
-        ACC_NoAttack=(TP_NoAttack+TN_NoAttack)/(TP_NoAttack+TN_NoAttack+FP_NoAttack+FN_NoAttack)
-        print("The ACC_NoAttack result=", ACC_NoAttack)
-                
-    if  ((TP_NoAttack+FP_NoAttack)*(TP_NoAttack+FN_NoAttack)*(TN_NoAttack+FP_NoAttack)*(TN_NoAttack+FN_NoAttack))>0:
-        MCC_NoAttack=(TP_NoAttack*TN_NoAttack-FP_NoAttack*FN_NoAttack)/math.sqrt((TP_NoAttack+FP_NoAttack)*(TP_NoAttack+FN_NoAttack)*(TN_NoAttack+FP_NoAttack)*(TN_NoAttack+FN_NoAttack))
-        print("The Matthews correlation coefficient result=", MCC_NoAttack)
-    print("                                                                                                                               ")
-    print("*****************************************************End of Without Attack part************************************************")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("*****************************************************Label Flipping Attack*****************************************************")
-    print("                                                                                                                               ")
-    #************************** 
-    # finding Malware of Train data
-    malware_train= sparse.lil_matrix(X_train)
-    cl_malware=list()
-    z_m=0    
-    count_m=0
-    for i, j in enumerate(Y_train):
-         if j == 1:
-            delete_row_lil(malware_train, i-count_m)
-            count_m=count_m+1
-         else:
-            cl_malware.insert(z_m, 1)
-            z_m=z_m+1 
-    #***************************
-    #Finding Benign of Train data
-    cl_X_train=list(Y_train) 
-    benign_train=sparse.lil_matrix(X_train)
-    z_b=0    
-    count_b=0
-    cl_benign=list()
-    for i, j in enumerate(cl_X_train):
-        if j == 0:
-            delete_row_lil(benign_train, i-count_b)
-            count_b=count_b+1
-        else:
-            cl_benign.insert(z_b, 1)
-            z_b=z_b+1
-    print("***********Size of Each Data Part:**********")        
-    print("malware_train=", malware_train.get_shape())
-    print("benign_train=", benign_train.get_shape())
-    #***************************************************
-    row_malware_train,column_malware_train=malware_train.get_shape()
-    #Number_of_flipped_label=int(row_malware_train)
-    
-    X_train_LFA=X_train.copy()
-    Y_train_LFA=Y_train[:]
-    
-    row_train_LFA,column_train_LFA=X_train_LFA.get_shape()
-    clusterer = KMeans(n_clusters=2, random_state=10)
-    X=X_train_LFA.toarray()
-    t0=time.time()
-    cluster_labels = clusterer.fit_predict(X)
-    sample_silhouette_values = silhouette_samples(X, cluster_labels)
-    #print("sample_silhouette_values=",sample_silhouette_values)
-    
-    flipped_Y_train=list(Y_train_LFA)
-    counter=0
-    for new_index in range(row_train_LFA): 
-        if (sample_silhouette_values[new_index]<0.1):                           #and (flipped_Y_train[new_index]==0)
-             flipped_Y_train[new_index]=abs(flipped_Y_train[new_index]-1)     #flipped_Y_train[new_index]=1
-             counter=counter+1
-
-    print("Flipped  counter=", counter)         
-    t1=time.time()
-    print("Time for Label Flipping Attack =",t1-t0)
-    print("                                                                   ")
-    
-     #**************************************************************************
-    model_main_LFA_Final = Sequential()
-    model_main_LFA_Final.add(Embedding(row_train_LFA, 8, input_length=column_train_LFA))
-    model_main_LFA_Final.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main_LFA_Final.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_LFA_Final.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main_LFA_Final.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_LFA_Final.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main_LFA_Final.add(Flatten())
-    model_main_LFA_Final.add(Dense(1, activation='sigmoid'))
-    model_main_LFA_Final.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main_LFA_Final.fit(X_train_LFA, flipped_Y_train, epochs=200, verbose=0)
-    
-   
-    Y_predict_LFA=model_main_LFA_Final.predict(X_test, verbose=0)
-    Y_predict_LFA_Final=[0]*len(Y_predict_LFA)
-    
-    for i in range(len(Y_predict_LFA)):
-        if Y_predict_LFA[i]<0.5:
-              Y_predict_LFA[i]=0
-        else:
-              Y_predict_LFA[i]=1
-
-    for i in range(len(Y_predict_LFA)):
-        Y_predict_LFA_Final[i]= int(Y_predict_LFA[i])    
-    #*****************************************************Result of Model with  LFA ******************************************************
-    print("********************************Result of Model with LFA attack **************************************************************")
-    print("                                                                                                                              ")
-    loss, accuracy = model_main_LFA_Final.evaluate(X_train_LFA, flipped_Y_train, verbose=2)
-    print('Accuracy for Train set: %f' % (accuracy*100))
-    print('Loss for Train set: %f' % (loss))
-    print("                                                                   ")
-   
-    loss, accuracy = model_main_LFA_Final.evaluate(X_test, Y_test, verbose=2)
-    print('Accuracy for Test set: %f' % (accuracy*100))
-    print('Loss for Test set:: %f' % (loss))
-    print("                                                                   ")
-
-    TN_LFA, FP_LFA, FN_LFA, TP_LFA = confusion_matrix(Y_test,  Y_predict_LFA_Final).ravel()
-    print("TN_LFA=",TN_LFA)
-    print("FP_LFA=",FP_LFA)
-    print("FN_LFA=",FN_LFA)
-    print("TP_LFA=",TP_LFA)
-    print("                                                                   ")
-
-    if (FP_LFA+TN_LFA)>0:
-         FPR_LFA=FP_LFA/(FP_LFA+TN_LFA)
-         print("The FPR_LFA result=", FPR_LFA)
-                
-    if (FP_LFA+TN_LFA)>0:
-         TPR_LFA=TP_LFA/(TP_LFA+FN_LFA)
-         print("The TPR_LFA result=", TPR_LFA)
-                
-    if (TN_LFA+FP_LFA)>0:
-        TNR_LFA=TN_LFA/(TN_LFA+FP_LFA)
-        print("The TNR_LFA result=", TNR_LFA)
-                
-    if (FN_LFA+TP_LFA)>0:
-        FNR_LFA=FN_LFA/(FN_LFA+TP_LFA)
-        print("The FNR_LFA result=", FNR_LFA)
-                
-    if ((TN_LFA/(TN_LFA+FP_LFA))+(TP_LFA/(TP_LFA+FP_LFA)))>0:
-        AUC_LFA=1/(2*((TN_LFA/(TN_LFA+FP_LFA))+(TP_LFA/(TP_LFA+FP_LFA))))
-        print("The AUC_LFA result=", AUC_LFA)
-                
-    if  (TP_LFA+TN_LFA+FP_LFA+FN_LFA)>0:   
-        ACC_LFA=(TP_LFA+TN_LFA)/(TP_LFA+TN_LFA+FP_LFA+FN_LFA)
-        print("The ACC_LFAk result=", ACC_LFA)
-                
-    if  ((TP_LFA+FP_LFA)*(TP_LFA+FN_LFA)*(TN_LFA+FP_LFA)*(TN_LFA+FN_LFA))>0:
-        MCC_LFA=(TP_LFA*TN_LFA-FP_LFA*FN_LFA)/math.sqrt((TP_LFA+FP_LFA)*(TP_LFA+FN_LFA)*(TN_LFA+FP_LFA)*(TN_LFA+FN_LFA))
-        print("The Matthews correlation coefficient result=", MCC_LFA)   
-    print("                                                                                                                               ")
-    print("************************************************End of Label Flipping Attack part**********************************************")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("*****************************************************KNN Based Semi-Supervised Defense(KSD)************************************")
-    print("                                                                                                                               ")
-
-    X_train_KNN=X_train.copy()
-    Y_train_KNN=flipped_Y_train[:]
-    
-    X_val_KNN=X_val.copy()
-    Y_val_KNN=Y_val[:]
-    
-    row_train_KNN,column_train_KNN=X_train_KNN.get_shape()
-        
-    Number_of_flipped_label=int(row_train_KNN/50)
-    Y_train_corrected_By_KNN=list(Y_train_KNN)
-
-    c=0
-    m=0
-    t2=time.time()
-    
-    for i in list(range(Number_of_flipped_label)):
-         row_KNN=X_train_KNN.getrow(i)
-         distances = sklearn.metrics.pairwise.manhattan_distances(row_KNN,X_val_KNN)
-         indices = distances.argsort()[:10]
-         d=indices[0]
-         a=d[0:10]
-        
-         F=0
-         for j in range(len(a)):
-                 t=a[j]
-                 F=F+Y_val_KNN[t]
-         fraction=F/10
-         if fraction>=0.5:
-             Y_train_corrected_By_KNN[i]=1
-             m=m+1
-         else: 
-             Y_train_corrected_By_KNN[i]=0
-             c=c+1
-    Y_train_corrected_By_KNN_Final=np.array(Y_train_corrected_By_KNN)  
-    t3=time.time()
-    print("Time for KNN Based Semi-Supervised Defense(KSD) =",t3-t2)
-    print("                                                                   ")
-
-    model_main_KNN = Sequential()
-    model_main_KNN.add(Embedding(row_train_NoAttack, 8, input_length=column_train_NoAttack))
-    model_main_KNN.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main_KNN.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_KNN.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main_KNN.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_KNN.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main_KNN.add(Flatten())
-    model_main_KNN.add(Dense(1, activation='sigmoid'))
-    model_main_KNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main_KNN.fit(X_train_KNN,Y_train_corrected_By_KNN_Final, epochs=20, batch_size=32, verbose=0)
-    Y_predict_KNN=model_main_KNN.predict(X_test, verbose=0)
-    
-    Y_predict_KNN_Final=[0]*len(Y_predict_KNN)
-    for i in range(len(Y_predict_KNN)):
-        if Y_predict_KNN[i]<0.5:
-              Y_predict_KNN[i]=0
-        else:
-              Y_predict_KNN[i]=1
-    
-    for i in range(len(Y_predict_KNN)):
-        Y_predict_KNN_Final[i]= int(Y_predict_KNN[i])
-    #*****************************************************Result of Model After KNN Based Defense*****************************************
-    print("************************Result After KNN_Based Defense************************************************************************")
-    print("                                                                                                                               ")
-
-    loss, accuracy = model_main_KNN.evaluate(X_train_KNN, Y_train_KNN, verbose=0)
-    print('Accuracy for Train set: %f' % (accuracy*100))
-    print('Loss for Train set: %f' % (loss))
-    print("                                                                   ")
-
-    loss, accuracy = model_main_KNN.evaluate(X_test, Y_test, batch_size=32, verbose=0)
-    print('Accuracy After KNN-Based Defense: %f' % (accuracy*100))
-    print('Loss After KNN-Based Defense: %f' % (loss))
-    print("                                                                   ")
-
-    TN_KNN, FP_KNN, FN_KNN, TP_KNN = confusion_matrix(Y_test,   Y_predict_KNN_Final).ravel()
-    print("TN_KNN=",TN_KNN)
-    print("FP_KNN=",FP_KNN)
-    print("FN_KNN=",FN_KNN)
-    print("TP_KNN=",TP_KNN)
-    print("                                                                   ")
-
-    if (FP_KNN+TN_KNN)>0:
-         FPR_KNN=FP_KNN/(FP_KNN+TN_KNN)
-         print("The FPR_KNN result=", FPR_KNN)
-                
-    if (FP_KNN+TN_KNN)>0:
-         TPR_KNN=TP_KNN/(TP_KNN+FN_KNN)
-         print("The TPR_KNN result=", TPR_KNN)
-                
-    if (TN_KNN+FP_KNN)>0:
-        TNR_KNN=TN_KNN/(TN_KNN+FP_KNN)
-        print("The TNR_KNN result=", TNR_KNN)
-                
-    if (FN_KNN+TP_KNN)>0:
-        FNR_KNN=FN_KNN/(FN_KNN+TP_KNN)
-        print("The FNR_KNN result=", FNR_KNN)
-                
-    if ((TN_KNN/(TN_KNN+FP_KNN))+(TP_KNN/(TP_KNN+FP_KNN)))>0:
-        AUC_KNN=1/(2*((TN_KNN/(TN_KNN+FP_KNN))+(TP_KNN/(TP_KNN+FP_KNN))))
-        print("The AUC_KNN result=", AUC_KNN)
-                
-    if  (TP_KNN+TN_KNN+FP_KNN+FN_KNN)>0:   
-        ACC_KNN=(TP_KNN+TN_KNN)/(TP_KNN+TN_KNN+FP_KNN+FN_KNN)
-        print("The ACC_KNN result=", ACC_KNN)
-                
-    if  ((TP_KNN+FP_KNN)*(TP_KNN+FN_KNN)*(TN_KNN+FP_KNN)*(TN_KNN+FN_KNN))>0:
-        MCC_KNN=(TP_KNN*TN_KNN-FP_KNN*FN_KNN)/math.sqrt((TP_KNN+FP_KNN)*(TP_KNN+FN_KNN)*(TN_KNN+FP_KNN)*(TN_KNN+FN_KNN))
-        print("The Matthews correlation coefficient result=", MCC_KNN)
-    print("                                                                                                                               ")
-    print("************************************************End of KNN Based Semi-Supervised Defense(KSD) part*****************************")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")    
-    print("*****************************************************Label Based Semi-supervised Defense(LSD)**********************************")
-    print("                                                                                                                               ")
-    #***********************label Propagation and Label Spreading for Using in Label Based Semi-supervised Defense(LSD) *******************
-    X_train_LSD=X_train.copy()
-    Y_train_LSD=flipped_Y_train[:]
-    
-    X_val_LSD=X_val.copy()
-    Y_val_LSD=Y_val[:]
-    row_val_LSD,column_val_LSD=X_val_LSD.get_shape()
-    row_train_LSD,column_train_LSD=X_train_LSD.get_shape()
-    
-    t4=time.time()
-    
-    labels = np.full(row_train_LSD, -1)
-    for i in range(row_val_LSD):
-        labels[i] = Y_val_LSD[i]
-
-    X=X_train_LSD.toarray()
-    label_spread = label_propagation.LabelSpreading(kernel='knn', alpha=0.8)
-    label_propa=label_propagation.LabelPropagation(kernel='knn', gamma=20, n_neighbors=7, max_iter=1000, tol=0.001, n_jobs=None)
-    label_spread.fit(X, labels)
-    label_propa.fit(X, labels)
-    output_labels_spread = label_spread.transduction_
-    output_labels_propa = label_propa.transduction_
-    #*******************Convolutional Neural Network for Using in Label Based Semi-supervised Defense(LSD) ******************************
-    CNN_model_for_LSD = Sequential()
-    CNN_model_for_LSD.add(Embedding(row_train_LSD, 8, input_length=column_train_LSD))
-    CNN_model_for_LSD.add(Conv1D(16,2, strides=2, padding='same'))
-    CNN_model_for_LSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    CNN_model_for_LSD.add(Conv1D(32,2, strides=2, padding='same'))
-    CNN_model_for_LSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    CNN_model_for_LSD.add(Conv1D(64,2, strides=2, padding='same'))
-    CNN_model_for_LSD.add(Flatten())
-    
-    CNN_model_for_LSD.add(Dense(1, activation='sigmoid'))
-    CNN_model_for_LSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    CNN_model_for_LSD.fit(X_train_LSD, Y_train_LSD, epochs=200, verbose=0)
-    
-    Y_predict_CNN_for_LSD=CNN_model_for_LSD.predict(X_train_LSD, verbose=0)
-    
-    Y_predict_CNN_LSD_Final=[0]*len(Y_predict_CNN_for_LSD)
-    for i in range(len(Y_predict_CNN_for_LSD)):
-        if Y_predict_CNN_for_LSD[i]<0.5:
-              Y_predict_CNN_for_LSD[i]=0
-        else:
-               Y_predict_CNN_for_LSD[i]=1
-    
-    for i in range(len(Y_predict_CNN_for_LSD)):
-        Y_predict_CNN_LSD_Final[i]= int(Y_predict_CNN_for_LSD[i])
-    #*******************************************Voting Between CNN , label Propagation and Label Spreading**************************     
-    Y_predict_LSD_Final=[0]*len(Y_train)
-    for i in range(len(Y_train)):
-        c=Y_train_LSD[i]+Y_predict_CNN_LSD_Final[i]+output_labels_propa[i]+output_labels_spread[i]
-        if 2<=c:
-            Y_predict_LSD_Final[i]=1
-        else:
-            Y_predict_LSD_Final[i]=0
-    t5=time.time()
-    print("Time for Label Based Semi-supervised Defense =",t5-t4)
-    print("                                                                                                                               ")
-    #*********************************************************************************************************************************
-    model_main_LSD = Sequential()
-    model_main_LSD.add(Embedding(row_train_LSD, 8, input_length=column_train_LSD))
-    model_main_LSD.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main_LSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_LSD.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main_LSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_LSD.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main_LSD.add(Flatten())
-    model_main_LSD.add(Dense(1, activation='sigmoid'))
-    model_main_LSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main_LSD.fit(X_train_LSD, Y_predict_LSD_Final, epochs=200, verbose=0)
-        
-    Y_predict_LSD_Defense=model_main_LSD.predict(X_test, verbose=0)
-    Y_predict_LSD_Defense_Final=[0]*len(Y_predict_LSD_Defense)
-    
-    for i in range(len(Y_predict_LSD_Defense)):
-        if Y_predict_LSD_Defense[i]<0.5:
-              Y_predict_LSD_Defense[i]=0
-        else:
-               Y_predict_LSD_Defense[i]=1
-    
-    for i in range(len(Y_predict_LSD_Defense)):
-        Y_predict_LSD_Defense_Final[i]= int(Y_predict_LSD_Defense[i])  
-    #**************************************Result of Model after Label Based Semi-supervised Defense(LSD)**********************************
-    print("************************Result of Model after Label Based Semi-supervised Defense(LSD)*****************************************")
-    print("                                                                                                                               ")
-    loss, accuracy = model_main.evaluate(X_train, Y_predict_LSD_Final, verbose=2)
-    print('Accuracy for Train set: %f' % (accuracy*100))
-    print('Loss for Train set: %f' % (loss))
-    print("                                                                   ")
-
-    loss, accuracy = model_main.evaluate(X_test, Y_test, verbose=2)
-    print('Accuracy for Test set: %f' % (accuracy*100))
-    print('Loss for Test set:: %f' % (loss))
-    print("                                                                   ")
-
-    TN_LSD, FP_LSD, FN_LSD, TP_LSD = confusion_matrix(Y_test,  Y_predict_LSD_Defense_Final).ravel()
-    print("TN_LSD=",TN_LSD)
-    print("FP_LSD=",FP_LSD)
-    print("FN_LSD=",FN_LSD)
-    print("TP_LSD=",TP_LSD)
-    print("                                                                   ")
-
-    if (FP_LSD+TN_LSD)>0:
-         FPR_LSD=FP_LSD/(FP_LSD+TN_LSD)
-         print("The FPR_LSD result=", FPR_LSD)
-                
-    if (FP_LSD+TN_LSD)>0:
-         TPR_LSD=TP_LSD/(TP_LSD+FN_LSD)
-         print("The TPR_LSD result=", TPR_LSD)
-                
-    if (TN_LSD+FP_LSD)>0:
-        TNR_LSD=TN_LSD/(TN_LSD+FP_LSD)
-        print("The TNR_LSD result=", TNR_LSD)
-                
-    if (FN_LSD+TP_LSD)>0:
-        FNR_LSD=FN_LSD/(FN_LSD+TP_LSD)
-        print("The FNR_LSD result=", FNR_LSD)
-                
-    if ((TN_LSD/(TN_LSD+FP_LSD))+(TP_LSD/(TP_LSD+FP_LSD)))>0:
-        AUC_LSD=1/(2*((TN_LSD/(TN_LSD+FP_LSD))+(TP_LSD/(TP_LSD+FP_LSD))))
-        print("The AUC result=", AUC_LSD)
-                
-    if  (TP_LSD+TN_LSD+FP_LSD+FN_LSD)>0:   
-        ACC_LSD=(TP_LSD+TN_LSD)/(TP_LSD+TN_LSD+FP_LSD+FN_LSD)
-        print("The ACC result=", ACC_LSD)
-                
-    if  ((TP_LSD+FP_LSD)*(TP_LSD+FN_LSD)*(TN_LSD+FP_LSD)*(TN_LSD+FN_LSD))>0:
-        MCC_LSD=(TP_LSD*TN_LSD-FP_LSD*FN_LSD)/math.sqrt((TP_LSD+FP_LSD)*(TP_LSD+FN_LSD)*(TN_LSD+FP_LSD)*(TN_LSD+FN_LSD))
-        print("The Matthews correlation coefficient result=", MCC_LSD) 
-    print("                                                                                                                               ")
-    print("*****************************************************End of Label Based Semi-supervised Defense(LSD)***************************")   
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("*****************************************************Clustering Based Semi-supervised Defense(CSD)*****************************")
-    print("                                                                                                                               ")
-
-    X_train_CSD=X_train.copy()
-    Y_train_CSD=flipped_Y_train[:]
-    
-    X_val_CSD=X_val.copy()
-    Y_val_CSD=Y_val[:]
-    row_train_CSD,column_train_CSD=X_train_CSD.get_shape()
-    
-    t6=time.time()
-
-    Y_predict_val_from_CNN_Model=model_main.predict(X_val_CSD, verbose=0)
-    
-    Y_predict_val_from_CNN_Model_Final=[0]*len(Y_predict_val_from_CNN_Model)
-    for i in range(len(Y_predict_val_from_CNN_Model)):
-        if Y_predict_val_from_CNN_Model[i]<0.5:
-              Y_predict_val_from_CNN_Model[i]=0
-        else:
-              Y_predict_val_from_CNN_Model[i]=1
-    for i in range(len(Y_predict_val_from_CNN_Model)):
-       Y_predict_val_from_CNN_Model_Final[i]= int(Y_predict_val_from_CNN_Model[i])
-        
-    adjusted_rand_score_val=metrics.adjusted_rand_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final)
-    adjusted_mutual_info_score_val=metrics.adjusted_mutual_info_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) 
-    homogeneity_score_val=metrics.homogeneity_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) 
-    fowlkes_mallows_score_val=metrics.fowlkes_mallows_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) 
-  
-    for i in range(20):        #row_train
-        Y_temp=Y_val_CSD.copy()
-
-        row=X_train_CSD.getrow(i)
-        X_temp = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_val_CSD, row))))
-        Y_temp.append(Y_train_CSD[i])
-        
-        Y_predict_CNN_compute_CSD=model_main.predict(X_temp, verbose=0)
-        
-        Y_predict_temp=[0]*len(Y_predict_CNN_compute_CSD)
-        
-        for n in range(len(Y_predict_CNN_compute_CSD)):
-            if Y_predict_CNN_compute_CSD[n]<0.5:
-                  Y_predict_CNN_compute_CSD[n]=0
-            else:
-                   Y_predict_CNN_compute_CSD[n]=1
-         
-        for m in range(len(Y_predict_CNN_compute_CSD)):
-            Y_predict_temp[m]= int(Y_predict_CNN_compute_CSD[m])
-
-        adjusted_rand_score_temp=metrics.adjusted_rand_score(Y_temp, Y_predict_temp)
-        adjusted_mutual_info_score_temp=metrics.adjusted_mutual_info_score(Y_temp, Y_predict_temp) 
-        homogeneity_score_temp=metrics.homogeneity_score(Y_temp, Y_predict_temp) 
-        fowlkes_mallows_score_temp=metrics.fowlkes_mallows_score(Y_temp, Y_predict_temp)
-        
-        landa1=abs(adjusted_rand_score_temp-adjusted_rand_score_val)
-        landa2=abs(adjusted_mutual_info_score_temp-adjusted_mutual_info_score_val)
-        landa3=abs(homogeneity_score_temp-homogeneity_score_val)
-        landa4=abs(fowlkes_mallows_score_temp-fowlkes_mallows_score_val)
-        
-        sum_of_diffrences=landa1+landa2+landa3+landa4
-        
-        if sum_of_diffrences<0.1:
-            X_val_CSD = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_val_CSD, row))))
-            Y_val_CSD.append(Y_train_CSD[i])          
-            Y_predict_CNN_inside_CSD=model_main.predict(X_val_CSD, verbose=0)
-            
-            Y_predict_CNN_inside_CSD_Final=[0]*len(Y_predict_CNN_inside_CSD)                   #Y_predict_CNN_inside
-            for j in range(len(Y_predict_CNN_inside_CSD)):                                     #Y_predict_CNN_inside
-                if Y_predict_CNN_inside_CSD[j]<0.5:
-                      Y_predict_CNN_inside_CSD[j]=0
-                else:
-                      Y_predict_CNN_inside_CSD[j]=1
-                       
-            for k in range(len(Y_predict_CNN_inside_CSD)):                              #Y_predict_CNN_inside
-                Y_predict_CNN_inside_CSD_Final[k]= int(Y_predict_CNN_inside_CSD[k])
-
-            adjusted_rand_score_val=metrics.adjusted_rand_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final)
-            adjusted_mutual_info_score_val=metrics.adjusted_mutual_info_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) 
-            homogeneity_score_val=metrics.homogeneity_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) 
-            fowlkes_mallows_score_val=metrics.fowlkes_mallows_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) 
-    t7=time.time()
-    print("Time for Clustering Based Semi-supervised Defense =",t7-t6)
-    print("                                                                   ")
-    #**************************************************************************************** 
-    X_train_Final_CSD= X_val_CSD.copy()  
-    Y_train_Final_CSD=Y_val_CSD.copy()
-    row_train_CSD_Final,col_train_CSD_Final=X_train_Final_CSD.get_shape()    
-    
-    model_main_CSD = Sequential()
-    model_main_CSD.add(Embedding(row_train_CSD_Final, 8, input_length=col_train_CSD_Final))
-    model_main_CSD.add(Conv1D(16,2, strides=2, padding='same'))
-    model_main_CSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_CSD.add(Conv1D(32,2, strides=2, padding='same'))
-    model_main_CSD.add(MaxPooling1D(pool_size = (4), strides=(2)))
-    model_main_CSD.add(Conv1D(64,2, strides=2, padding='same'))
-    model_main_CSD.add(Flatten())
-    model_main_CSD.add(Dense(1, activation='sigmoid'))
-    model_main_CSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
-    model_main_CSD.fit(X_train_Final_CSD, Y_train_Final_CSD, epochs=200, verbose=0)
-       
-    Y_test_predict_CSD=model_main_CSD.predict(X_test, verbose=0)
-    
-    Y_test_predict_CSD_Final=[0]*len(Y_test_predict_CSD)
-    for i in range(len(Y_test_predict_CSD)):
-        if Y_test_predict_CSD[i]<0.5:
-              Y_test_predict_CSD[i]=0
-        else:
-              Y_test_predict_CSD[i]=1
-    
-    for i in range(len(Y_test_predict_CSD)):
-        Y_test_predict_CSD_Final[i]= int(Y_test_predict_CSD[i])
-        
-    #*****************************************************Result of Model after Clustering Based Semi-supervised Defense(CSD)**************
-    print("***********************Result of Model after Clustering Based Semi-supervised Defense(CSD)*************************************")  
-    print("                                                                                                                               ")
-
-    loss, accuracy = model_main_CSD.evaluate(X_train_Final_CSD, Y_train_Final_CSD, verbose=2)
-    print('Accuracy for New Train set: %f' % (accuracy*100))
-    print('Loss for New Train set: %f' % (loss))
-    print("                                                                   ")
-
-    loss, accuracy = model_main_CSD.evaluate(X_test, Y_test, verbose=2)
-    print('Accuracy for Test set: %f' % (accuracy*100))
-    print('Loss for Test set:: %f' % (loss))
-    print("                                                                   ")
-
-    TN_CSD, FP_CSD, FN_CSD, TP_CSD = confusion_matrix(Y_test,  Y_test_predict_CSD_Final).ravel()
-    print("TN_CSD=",TN_CSD)
-    print("FP_CSD=",FP_CSD)
-    print("FN_CSD=",FN_CSD)
-    print("TP_CSD=",TP_CSD)
-    print("                                                                   ")
-
-    if (FP_CSD+TN_CSD)>0:
-         FPR_CSD=FP_CSD/(FP_CSD+TN_CSD)
-         print("The FPR_CSD result=", FPR_CSD)
-                
-    if (FP_CSD+TN_CSD)>0:
-         TPR_CSD=TP_CSD/(TP_CSD+FN_CSD)
-         print("The TPR_CSD result=", TPR_CSD)
-                
-    if (TN_CSD+FP_CSD)>0:
-        TNR_CSD=TN_CSD/(TN_CSD+FP_CSD)
-        print("The TNR_CSD result=", TNR_CSD)
-                
-    if (FN_CSD+TP_CSD)>0:
-        FNR_CSD=FN_CSD/(FN_CSD+TP_CSD)
-        print("The FNR_CSD result=", FNR_CSD)
-                
-    if ((TN_CSD/(TN_CSD+FP_CSD))+(TP_CSD/(TP_CSD+FP_CSD)))>0:
-        AUC_CSD=1/(2*((TN_CSD/(TN_CSD+FP_CSD))+(TP_CSD/(TP_CSD+FP_CSD))))
-        print("The AUC_CSD result=", AUC_CSD)
-                
-    if  (TP_CSD+TN_CSD+FP_CSD+FN_CSD)>0:   
-        ACC_CSD=(TP_CSD+TN_CSD)/(TP_CSD+TN_CSD+FP_CSD+FN_CSD)
-        print("The ACC_CSD result=", ACC_CSD)
-                
-    if  ((TP_CSD+FP_CSD)*(TP_CSD+FN_CSD)*(TN_CSD+FP_CSD)*(TN_CSD+FN_CSD))>0:
-        MCC_CSD=(TP_CSD*TN_CSD-FP_CSD*FN_CSD)/math.sqrt((TP_CSD+FP_CSD)*(TP_CSD+FN_CSD)*(TN_CSD+FP_CSD)*(TN_CSD+FN_CSD))
-        print("The Matthews correlation coefficient result=", MCC_CSD)
-    print("                                                                   ")
-    print("************************************************End of Clustering Based Semi-supervised Defense(LSD)***************************")   
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-    print("                                                                                                                               ")
-#******************************************************************************************************************************************
-if __name__ == "__main__":
-    main()
-#******************************************************************************  
\ No newline at end of file
diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/README.txt b/Taheri2020NCAA-labelflipping_Sourcecode/README.txt
deleted file mode 100644
index d557dd50551a7e881f5c37d5a5064ffaf7628897..0000000000000000000000000000000000000000
--- a/Taheri2020NCAA-labelflipping_Sourcecode/README.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-
-README.txt
-
-Help file to run the project written in Python 2.7 and 3.0.
-
-Dr. Rahim Taheri did this implementation. 
-Dr. Rahim Taheri, Dr. Mohammad Shojafar and Dr. Zahra Pooranian helped on idea brainstorming and documentation. 
-Prof. Reza Javidan, Prof. Ali Miri and Prof. M. Conti helped in English correction and leading the team.
-
-If you need any help on the code, feel free to drop a message to
-
-Dr. Mohammad Shojafar <mohammad.shojafar@gmail.com> or <m.shojafar@ieee.org> or
-Dr. Rahim Taheri <taheri.rahim@gmail.com> 
-
-Step of the running project:
-
-Label_Flipping_Paper_with_Feature_Selection(LSD_CSD_KDD).py is for label flipping code with feature selection method on LSD CSD and KDD
-
-Label_Flipping_Paper_without_Feature_Selection(LSD_CSD_KDD).py is for label flipping code without feature selection method on LSD CSD and KDD
-
-The comparisons are embedded in the code.
-
-We used three datasets which can be obtained through the links on the paper. 
-
-Note: you need to preprocess and clean the dataset before implementation. 
-
-
-I will be glad to cite our paper with the following details in your research papers:
-
-R. Taheri, R. Javidan, M. Shojafar, Z. Pooranian, A. Miri, M. Conti, "On Defending Against Label Flipping Attacks on Malware Detection Systems", Springer, Neural Computing and Applications (NCAA), Vol. 32, pp. 14781–14800, July 2020.
-
-DOI: https://doi.org/10.1007/s00521-020-04831-9
\ No newline at end of file
diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/copyright notice.docx b/Taheri2020NCAA-labelflipping_Sourcecode/copyright notice.docx
deleted file mode 100644
index e238f77a424bf7fa46148840f9c8485e73d491ab..0000000000000000000000000000000000000000
Binary files a/Taheri2020NCAA-labelflipping_Sourcecode/copyright notice.docx and /dev/null differ