From b762c6dd8de1fb997474542e55807b009f516f9b Mon Sep 17 00:00:00 2001 From: Mohammad Shojafar <mshojafar@users.noreply.github.com> Date: Sat, 16 Jan 2021 13:23:16 +0000 Subject: [PATCH] source codes --- .../GAN_Based_defense.py | 598 +++++++++++++ ...per_with_Feature_Selection(LSD_CSD_KDD).py | 840 ++++++++++++++++++ ..._without_Feature_Selection(LSD_CSD_KDD).py | 833 +++++++++++++++++ .../README.txt | 4 +- .../copyright notice.docx | Bin 0 -> 16152 bytes 5 files changed, 2273 insertions(+), 2 deletions(-) create mode 100644 Taheri2020NCAA-labelflipping_Sourcecode/GAN_Based_defense.py create mode 100644 Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_with_Feature_Selection(LSD_CSD_KDD).py create mode 100644 Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_without_Feature_Selection(LSD_CSD_KDD).py create mode 100644 Taheri2020NCAA-labelflipping_Sourcecode/copyright notice.docx diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/GAN_Based_defense.py b/Taheri2020NCAA-labelflipping_Sourcecode/GAN_Based_defense.py new file mode 100644 index 0000000..a2e3885 --- /dev/null +++ b/Taheri2020NCAA-labelflipping_Sourcecode/GAN_Based_defense.py @@ -0,0 +1,598 @@ +# -*- coding: utf-8 -*- + +""" +Created on Fri May 25 12:03:10 2018 + +@author: Rahim +#this approch use the distribution of Benign data to poison thet test data +""" +from __future__ import print_function +from sklearn.feature_selection import SelectFromModel +from sklearn.feature_selection import SelectKBest, f_regression +from sklearn.model_selection import KFold +from sklearn.model_selection import cross_val_score +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import classification_report +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix +from sklearn import model_selection +from sklearn.feature_selection import RFE +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import ExtraTreesClassifier +from sklearn.ensemble import RandomForestRegressor +from scipy.sparse import csr_matrix, vstack, hstack +from scipy import sparse +import pandas as pd +import numpy as np +import random +import time +import argparse +import math +from numpy import * +import os.path as osp +import scipy.sparse as sp +import pickle +from sklearn import metrics +from sklearn.metrics import accuracy_score +#****************************************************************************** +CLASS = 'class' +CLASS_BEN = 'B' +CLASS_MAL = 'M' +DATA = 'data' +#********************************************Functions that will be used in this program******************************************************************************************* +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input-tables', nargs='*', dest='input_tables') + + args = parser.parse_args() + + return args +#****************************************************************************** +def read_table(table_file): + + table = dict() + + with open(table_file, 'rb') as handle: + while True: + try: + table = pickle.load(handle) + except EOFError: + break + + f_set=set() + + for k,v in table.items(): + for feature in v[DATA]: + f_set.add(feature) + + return table , f_set +#****************************************************************************** +def build_table(tables): + full_table = dict() + + file_set = set() + + for table in tables: + file_set.update(table.keys()) + for key, val in table.items(): + full_table[key] = val + + files = list(file_set) + return full_table, files +#****************************************************************************** +def convert_to_matrix(table, features, files): + mat = sp.lil.lil_matrix((len(files), len(features)), dtype=np.int8) + + print("Input Data Size = ", mat.get_shape()) + # the response vector + + cl = [0]*len(files) + + for key, val in table.items(): + k = files.index(key) + + if val[CLASS] is CLASS_BEN: + cl[k] = 1 + + for v in val[DATA]: + try: + idx = features.index(v) + mat[k, idx] = 1 + except Exception as e: + print(e) + pass + + return mat, cl +#****************************************************************************** +def delete_row_lil(mat, i): + if not isinstance(mat, sp.lil.lil_matrix): + raise ValueError("works only for LIL format -- use .tolil() first") + mat.rows = np.delete(mat.rows, i) + mat.data = np.delete(mat.data, i) + mat._shape = (mat._shape[0] - 1, mat._shape[1]) +#****************************************************************************** +def relevant_features(data, response_vector, features): + rel_features = list() + ranked_index=list() + + model =RandomForestRegressor() + rfe = RFE(model, 1) + fit = rfe.fit(data, response_vector) + old_features=features + + for i in fit.ranking_: + if i<len(features): + rel_features.append(features[i]) + ranked_index=[old_features.index(x) for x in rel_features if x in old_features] + + return rel_features ,ranked_index +#*****************************************************************Main Function********************************************************************************************************* +def main(): + args = parse_args() + + tables = [] + f_set = set() + + #read the data + for t_files in args.input_tables: + table, features = read_table(t_files) + f_set = f_set.union(features) + tables.append(table) + print(" ") + print(" ") + print("*****************************************************************************************") + print("********Using Benign Distribution + Random Forest Classifier + GAN countermeasure********") + print("*****************************************************************************************") + + #*build table from data and convert to matrix + full_table, files = build_table(tables) + files.sort() + features = list(f_set) + features.sort() + mat, cl = convert_to_matrix(full_table, features, files) + + #Doing feature Ranking on all of the Data + print("************************Doing feature Ranking on all of the Data*************************") + t0=time.time() + r_features,ranked_index = relevant_features(mat, cl, features) + t1=time.time() + print("Time of Feature Ranking=",t1-t0) + print("******************************************************************************************") + + original_selected=ranked_index[1:301] + data = sparse.lil_matrix(sparse.csr_matrix(mat)[:,original_selected]) + seed = 10 + test_size = 0.2 + X_train, X_test, Y_train, Y_test= train_test_split(data, cl, test_size= test_size, random_state=seed) + test_size = 0.25 + X_train, X_val, Y_train, Y_val= train_test_split(X_train, Y_train, test_size= test_size, random_state=seed) + #************************************************************************** + num_trees = 100 + max_features = 3 + t0=time.time() + kfold = KFold(n_splits=10, random_state=10) + model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features) + model.fit(X_train, Y_train) + t1=time.time() + print("Time for Clssification Algorithm is runing on 300 high-ranked features =",t1-t0) + print("************************************Result without attack *******************************************************************************************") + # compute Classification Accuracy in train and test and Validation + scoring = 'accuracy' + results = model_selection.cross_val_score(model, X_train,Y_train, cv=kfold, scoring=scoring) + print(("The accuracy of Classification in train: %.3f (%.3f)") % (results.mean(), results.std())) + #********************* compute Classification Accuracy in validation*************************** + scoring = 'accuracy' + results = model_selection.cross_val_score(model, X_val,Y_val, cv=kfold, scoring=scoring) + print(("The accuracy of Classification in validation: %.3f (%.3f)") % (results.mean(), results.std())) + #********************* compute Classification Accuracy in test********************************* + scoring = 'accuracy' + results = model_selection.cross_val_score(model, X_test,Y_test, cv=kfold, scoring=scoring) + print(("The accuracy of Classification in test: %.3f (%.3f)") % (results.mean(), results.std())) + #********************* compute Classification Accuracy in Validation*************************** + predictions_val = model.predict(X_val) + print("classification_report by validation:") + print(classification_report(Y_val, predictions_val)) + #********************* compute Classification Accuracy in train******************************** + predictions = model.predict(X_test) + print("classification_report by test:") + print(classification_report(Y_test, predictions)) + #********************* compute Logarithmic Loss in Train********************************* + scoring = 'neg_log_loss' + results = model_selection.cross_val_score(model, X_train,Y_train, cv=kfold, scoring=scoring) + print(("The Loss of Classification in train data: %.3f (%.3f)") % (results.mean(), results.std())) + #********************* compute Logarithmic Loss in validation**************************** + scoring = 'neg_log_loss' + results = model_selection.cross_val_score(model, X_val,Y_val, cv=kfold, scoring=scoring) + print(("The Loss of Classification in validation data:: %.3f (%.3f)") % (results.mean(), results.std())) + #********************* compute Logarithmic Loss in Test*********************************** + scoring = 'neg_log_loss' + results = model_selection.cross_val_score(model, X_test,Y_test, cv=kfold, scoring=scoring) + print(("The Loss of Classification in test data:: %.3f (%.3f)") % (results.mean(), results.std())) + #********************* compute Area Under ROC Curve in Train****************************** + scoring = 'roc_auc' + results = model_selection.cross_val_score(model, X_train,Y_train, cv=kfold, scoring=scoring) + print(("The Area Under ROC Curve in Train: %.3f (%.3f)") % (results.mean(), results.std())) + #********************* compute Area Under ROC Curve in Validation************************* + scoring = 'roc_auc' + results = model_selection.cross_val_score(model, X_val,Y_val, cv=kfold, scoring=scoring) + print(("The Area Under ROC Curve in Validation: %.3f (%.3f)") % (results.mean(), results.std())) + #********************* compute Area Under ROC Curve in Test******************************* + scoring = 'roc_auc' + results = model_selection.cross_val_score(model, X_test,Y_test, cv=kfold, scoring=scoring) + print(("The Area Under ROC Curve in test: %.3f (%.3f)") % (results.mean(), results.std())) + #*****************************Compute FPR and TPR in Validation************************** + cm=confusion_matrix(Y_test, predictions) + print("confusion_matrix=") + print(cm) + TP=cm[0][0] + print("TP=",TP) + FP=cm[0][1] + print("FP=",FP) + FN=cm[1][0] + print("FN=",FN) + TN=cm[1][1] + print("TN=",TN) + FPR=FP/(FP+TN) + print("The FPR result=", FPR) + TPR=TP/(TP+FN) + print("The TPR result=", TPR) + + TNR=TN/(TN+FP) + print("The TNR result=", TNR) + + FNR=FN/(FN+TP) + print("The FNR result=", FNR) + + AUC=1/(2*((TN/(TN+FP))+(TP/(TP+FP)))) + print("The AUC result=", AUC) + + ACC=(TP+TN)/(TP+TN+FP+FN) + print("The ACC result=", ACC) + + MCC=(TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) + print("The Matthews correlation coefficient result=", MCC) + + print("*******************************End of Result without attack:*****************************************************************************************") + #************************************************************************************************************************************************************* + # finding Malware of test data + malware_test= sparse.lil_matrix(X_test) + cl_malware=list() + z_m=0 + count_m=0 + for i, j in enumerate(Y_test): + if j == 1: + delete_row_lil(malware_test, i-count_m) + count_m=count_m+1 + else: + cl_malware.insert(z_m, 1) + z_m=z_m+1 + + #************************** + #finding Benign of test data + benign_test = sparse.lil_matrix(X_test) + cl_benign=list() + z_b=0 + count_b=0 + for i, j in enumerate(Y_test): + if j == 0: + delete_row_lil(benign_test.tolil(), i-count_b) + count_b=count_b+1 + else: + cl_benign.insert(z_b, 1) + z_b=z_b+1 + #************************** + # finding Malware of Train data + malware_train= sparse.lil_matrix(X_train) + cl_malware=list() + z_m=0 + count_m=0 + for i, j in enumerate(Y_train): + if j == 1: + delete_row_lil(malware_train, i-count_m) + count_m=count_m+1 + else: + cl_malware.insert(z_m, 1) + z_m=z_m+1 + #*************************** + #Finding Benign of Train data + cl_X_train=list(Y_train) + benign_train=sparse.lil_matrix(X_train) + z_b=0 + count_b=0 + cl_benign_train=list() + for i, j in enumerate(cl_X_train): + if j == 0: + delete_row_lil(benign_train, i-count_b) + count_b=count_b+1 + else: + cl_benign_train.insert(z_b, 1) + z_b=z_b+1 + print("***********Size of Each Data Part:**********") + print("malware_train=", malware_train.get_shape()) + print("benign_train=", benign_train.get_shape()) + print("malware_test=", malware_test.get_shape()) + print("benign_test=", benign_test.get_shape()) + #*************************************************** + t0=time.time() + ranked_features_in_benign,ranked_index_of_benign = relevant_features(benign_train,cl_benign_train, features) + t1=time.time() + print("Time for Ranking benign_train to find important features =",t1-t0) + #*************************************************************************************************************************************************************** + numbers=list() + numbers=[3,6,9,12,15,18,21,24,27,30,60] + X_test = sp.lil.lil_matrix(X_test) + + for loop in range(10): + print("************************************************************************************************************************************************************************************") + print("Result related to loop number : ",loop) + + Malware_Test=sparse.lil_matrix(malware_test.copy()) + row_of_Malware,column_of_Malware=Malware_Test.get_shape() + index_of_row=list(range(row_of_Malware)) + random.shuffle(index_of_row) + + number_of_row_to_change=int(row_of_Malware/10) + selected_row=index_of_row[0:number_of_row_to_change] + + for i, v in enumerate(numbers): + print("*****************************************************************************************************************************************************") + print("*********************selected features :",int(v) ) + print("************************************Result after attack *************************") + max_index_of_column=int(v)+1 + t0=time.time() + rw_test,cl_test=X_test.get_shape() + poison_data=sp.lil.lil_matrix((0,cl_test),dtype=np.int8) + Malware_Test=sparse.lil_matrix(malware_test.copy()) + + counter_of_poisoned_point=0 + + for m,value in enumerate(selected_row): + flag=0 + for i, j in enumerate(ranked_index_of_benign[1:max_index_of_column]): + for k,l in enumerate(original_selected): + if j==l: + if Malware_Test[value,l]==0: + Malware_Test[value,l]=1 + flag=1 + if flag==1: + counter_of_poisoned_point=counter_of_poisoned_point+1 + + + Benign_Test=sparse.lil_matrix(benign_test.copy()) + poison_data = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((Benign_Test, Malware_Test)))) + r,w=poison_data.get_shape() + Y_test=Y_test[0:r] + + t1=time.time() + print("Time related to applying attack in this number of Features= ",t1-t0) + + print("Number of poisoned Malware= ",counter_of_poisoned_point) + #********************* compute Classification Accuracy in test********************************* + scoring = 'accuracy' + results = model_selection.cross_val_score(model, poison_data,Y_test, cv=kfold, scoring=scoring) + print(("The accuracy of Classification in test: %.3f (%.3f)") % (results.mean(), results.std())) + + #********************* compute Classification Accuracy in train******************************** + predictions = model.predict(poison_data) + print("classification_report by test:") + print(classification_report(Y_test, predictions)) + + #********************* compute Logarithmic Loss in Test*********************************** + scoring = 'neg_log_loss' + results = model_selection.cross_val_score(model, poison_data,Y_test, cv=kfold, scoring=scoring) + print(("The Loss of Classification in test data:: %.3f (%.3f)") % (results.mean(), results.std())) + + #********************* compute Area Under ROC Curve in Test******************************* + scoring = 'roc_auc' + results = model_selection.cross_val_score(model, poison_data,Y_test, cv=kfold, scoring=scoring) + print(("The Area Under ROC Curve in test: %.3f (%.3f)") % (results.mean(), results.std())) + #*****************************Compute FPR and TPR in Validation************************** + cm=confusion_matrix(Y_test, predictions) + print("confusion_matrix=") + print(cm) + TP=cm[0][0] + print("TP=",TP) + FP=cm[0][1] + print("FP=",FP) + FN=cm[1][0] + print("FN=",FN) + TN=cm[1][1] + print("TN=",TN) + FPR=FP/(FP+TN) + print("The FPR result=", FPR) + + TPR=TP/(TP+FN) + print("The TPR result=", TPR) + + TNR=TN/(TN+FP) + print("The TNR result=", TNR) + + FNR=FN/(FN+TP) + print("The FNR result=", FNR) + + AUC=1/(2*((TN/(TN+FP))+(TP/(TP+FP)))) + print("The AUC result=", AUC) + + ACC=(TP+TN)/(TP+TN+FP+FN) + print("The ACC result=", ACC) + + MCC=(TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) + print("The Matthews correlation coefficient result=", MCC) + + print("********************************************************************************") + print("*******************Result after applying GAN countermeasure**************") + t0=time.time() + + + model2 = ExtraTreesClassifier(n_estimators=250,random_state=0) + model2.fit(benign_train, cl_benign_train) + importances = model2.feature_importances_ + indices = np.argsort(importances)[::-1] + + + importance_of_Features_in_benign_train=list() + for f in range(60): + importance_of_Features_in_benign_train.append(indices[f]) + + #******************************Runing the Logistic Regression and finding Some Sampels Near to Hyperplain***************************** + poison_model = LogisticRegression() + poison_model.fit(X_train,Y_train) + print("Result related to Logistic Regression:") + scoring = 'accuracy' + poison_results = model_selection.cross_val_score(poison_model, X_train,Y_train, cv=kfold, scoring=scoring) + print(("The accuracy of Classification in train: %.3f (%.3f)") % (poison_results.mean(), poison_results.std())) + #********************* compute Logistic Regression Accuracy in validation without change *************************** + scoring = 'accuracy' + results = model_selection.cross_val_score(poison_model, X_val,Y_val, cv=kfold, scoring=scoring) + print(("The accuracy of Classification in validation: %.3f (%.3f)") % (poison_results.mean(), poison_results.std())) + #********************* compute Logistic Regression Accuracy in test without change ********************************* + scoring = 'accuracy' + results = model_selection.cross_val_score(poison_model, X_test,Y_test, cv=kfold, scoring=scoring) + print(("The accuracy of Classification in test: %.3f (%.3f)") % (poison_results.mean(), poison_results.std())) + #**********************Declration of Variables for finding desision value ************* + print("**************************************************************************************************") + temp=sparse.lil_matrix(X_train) + a,b=temp.get_shape() + decision_value=np.array([]) + selected_cl_malware_train=list() + selected_malware_train = sparse.lil_matrix(X_train) + #**********************finding malware_train and related desision value ********************************** + counter_of_malware_train=0 + count_deleted=0 + for j in range(a): + row=temp.getrow(j) + if Y_train[j]==0: + decision_value=np.append(decision_value,poison_model.decision_function(row)) + selected_cl_malware_train.insert(counter_of_malware_train, 0) + counter_of_malware_train=counter_of_malware_train+1 + else: + delete_row_lil(selected_malware_train.tolil(), j-count_deleted) + count_deleted=count_deleted+1 + #**********************sort the absolute value of decision value for malware_train************************* + decision_value=np.absolute(decision_value) + indices=decision_value.argsort() + + #************** Declration of Variables for selecting data************************************************* + number_of_row_malware_train,number_of_column_malware_train=malware_train.get_shape() + + number_of_row_selected_malware_train=int(number_of_row_malware_train/10) + + #****************Selecting index related to 10 percent of malware_train with minimum decision value******* + Selected_rows_as_less_likely=list() + Selected_rows_as_less_likely=indices[:number_of_row_selected_malware_train] + + Malware_less_likely=sp.lil.lil_matrix((0, number_of_column_malware_train), dtype=np.int8) + cl_less_likely=list() + counter_for_cl_less_likely=0 + for i,row_number in enumerate(Selected_rows_as_less_likely): + selected_row=malware_train.getrow(row_number) + Malware_less_likely= sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((selected_row, Malware_less_likely)))) + cl_less_likely.insert(counter_for_cl_less_likely,0) + counter_for_cl_less_likely=counter_for_cl_less_likely+1 + + number_of_row_in_Malware_less_likely,number_of_column_in_Malware_less_likely=Malware_less_likely.get_shape() + #****************finding Benign like samples******************************************************************************** + poisoned_data=sp.lil.lil_matrix((0, number_of_column_malware_train), dtype=np.int8) + c=0 + for counter_of_Malware_less_likely in range(number_of_row_in_Malware_less_likely): + selected_sample=Malware_less_likely.getrow(counter_of_Malware_less_likely) + + + c=0 + for S in range(number_of_column_in_Malware_less_likely): + index_for_change=random.randint(0,number_of_column_in_Malware_less_likely-1) + if selected_sample[0,index_for_change]==0: + selected_sample[0,index_for_change]=1 + label=model.predict(selected_sample) + if label==int(1): + poisoned_data= sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((selected_sample, poisoned_data)))) + c=c+1 + break + + Number_of_row_in_poisoned_data,Number_of_column_in_poison_data=poisoned_data.get_shape() + Y_poisoin=list() + for index in range(Number_of_row_in_poisoned_data): + Y_poisoin.append(0) + #*************************************************************************************************************************** + + poisoned_data_X=poisoned_data.copy() + poisoned_data_Y=Y_poisoin[:] + second_test_set=0.2 + X_poisoned_train, X_poisoned_test, Y_poisoned_train, Y_poisoned_test= train_test_split(poisoned_data_X, poisoned_data_Y, test_size= second_test_set, random_state=seed) + poison_data_for_retraining = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_train, X_poisoned_train)))) + poison_Class_for_retraining = Y_train + Y_poisoned_train + + num_trees = 100 + max_features = 3 + kfold = KFold(n_splits=10, random_state=10) + model_for_counter_measure = RandomForestClassifier(n_estimators=num_trees, max_features=max_features) + model_for_counter_measure.fit(poison_data_for_retraining,poison_Class_for_retraining) + + + poison_data_for_test_after_retraining = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_test, X_poisoned_test)))) + poison_Class_for_test_after_retraining= Y_test + Y_poisoned_test + + t1=time.time() + print("Time related to applying GAN countermeasure in this number of Features: ",t1-t0) + #********************* compute Classification Accuracy in test********************************* + scoring = 'accuracy' + results = model_selection.cross_val_score(model_for_counter_measure, poison_data_for_test_after_retraining,poison_Class_for_test_after_retraining, cv=kfold, scoring=scoring) + print(("The accuracy of Classification in test: %.3f (%.3f)") % (results.mean(), results.std())) + + #********************* compute Classification Accuracy in train******************************** + predictions = model.predict(poison_data_for_test_after_retraining) + print("classification_report by test:") + print(classification_report(poison_Class_for_test_after_retraining, predictions)) + + #********************* compute Logarithmic Loss in Test*********************************** + scoring = 'neg_log_loss' + results = model_selection.cross_val_score(model_for_counter_measure, poison_data_for_test_after_retraining , poison_Class_for_test_after_retraining, cv=kfold, scoring=scoring) + print(("The Loss of Classification in test data:: %.3f (%.3f)") % (results.mean(), results.std())) + + #********************* compute Area Under ROC Curve in Test******************************* + scoring = 'roc_auc' + results = model_selection.cross_val_score(model_for_counter_measure, poison_data_for_test_after_retraining , poison_Class_for_test_after_retraining, cv=kfold, scoring=scoring) + print(("The Area Under ROC Curve in test: %.3f (%.3f)") % (results.mean(), results.std())) + #*****************************Compute FPR and TPR in Validation************************** + cm=confusion_matrix(poison_Class_for_test_after_retraining, predictions) + print("confusion_matrix=") + print(cm) + TP=cm[0][0] + print("TP=",TP) + FP=cm[0][1] + print("FP=",FP) + FN=cm[1][0] + print("FN=",FN) + TN=cm[1][1] + print("TN=",TN) + FPR=FP/(FP+TN) + print("The FPR result=", FPR) + + TPR=TP/(TP+FN) + print("The TPR result=", TPR) + + TNR=TN/(TN+FP) + print("The TNR result=", TNR) + + FNR=FN/(FN+TP) + print("The FNR result=", FNR) + + AUC=1/(2*((TN/(TN+FP))+(TP/(TP+FP)))) + print("The AUC result=", AUC) + + ACC=(TP+TN)/(TP+TN+FP+FN) + print("The ACC result=", ACC) + + MCC=(TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) + print("The Matthews correlation coefficient result=", MCC) + + print("Result Related to this Numbers of features is finished:",int(v)) + Malware_Test=sparse.lil_matrix(malware_test.copy()) + selected_row=index_of_row[0:number_of_row_to_change] + original_selected=ranked_index[1:301] + print("End of loop number : ",loop) + print("************************************************************************************************************************************************************************************") +#******************************************************************************************************************************************************************** +if __name__ == "__main__": + main() +#****************************************************************************** diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_with_Feature_Selection(LSD_CSD_KDD).py b/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_with_Feature_Selection(LSD_CSD_KDD).py new file mode 100644 index 0000000..a5397c5 --- /dev/null +++ b/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_with_Feature_Selection(LSD_CSD_KDD).py @@ -0,0 +1,840 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 21 14:25:17 2019 + +@author: Rahim +""" +#*****************************************************************import Library***************************************************************************** +from __future__ import print_function +from sklearn.feature_selection import SelectFromModel +from sklearn.feature_selection import SelectKBest, f_regression +from sklearn.model_selection import KFold +from sklearn.model_selection import cross_val_score +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import classification_report +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix +from sklearn import model_selection +from sklearn.feature_selection import RFE +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import ExtraTreesClassifier +from sklearn.ensemble import RandomForestRegressor +from scipy.sparse import csr_matrix, vstack, hstack +from scipy.sparse import coo_matrix +from keras.preprocessing.text import one_hot +from sklearn import metrics +from sklearn.metrics import silhouette_samples, silhouette_score +from sklearn.semi_supervised import LabelPropagation +from sklearn.semi_supervised import LabelSpreading +from sklearn.semi_supervised import label_propagation +from sklearn.metrics import roc_auc_score +from sklearn.metrics import f1_score +from sklearn.cluster import KMeans +import math +#import keras +from keras.models import Sequential +from keras.layers import Dense, Dropout, Activation , Flatten +from sklearn.metrics import log_loss +from keras.optimizers import SGD +from keras.layers.normalization import BatchNormalization +from keras.layers.convolutional import UpSampling2D +from keras.layers.convolutional import Conv2D, MaxPooling2D, MaxPooling1D +from keras.layers.embeddings import Embedding +from scipy import sparse +import pandas as pd +import numpy as np +#import random +import sklearn +from sklearn.metrics.pairwise import manhattan_distances +from keras.models import Model +from keras.layers import Conv1D, multiply, GlobalMaxPool1D, Input , Lambda +import time +import argparse +#import math +from numpy import * +import os.path as osp +import scipy.sparse as sp +import pickle +from sklearn.metrics import accuracy_score +from warnings import simplefilter +#********************************************************************************************************************************* +CLASS = 'class' +CLASS_BEN = 'B' +CLASS_MAL = 'M' +DATA = 'data' +#********************************************Functions that will be used in this program***************************************** +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input-tables', nargs='*', dest='input_tables') + + args = parser.parse_args() + + return args +#********************************************************************************************************************************* +def read_table(table_file): + + table = dict() + + with open(table_file, 'rb') as handle: + while True: + try: + table = pickle.load(handle) + except EOFError: + break + + f_set=set() + + for k,v in table.items(): + for feature in v[DATA]: + f_set.add(feature) + + return table , f_set +#****************************************************************************** +def relevant_features(data, response_vector, features): + rel_features = list() + ranked_index=list() + + model =RandomForestRegressor() + rfe = RFE(model, 1) + fit = rfe.fit(data, response_vector) + old_features=features + + for i in fit.ranking_: + if i<len(features): + rel_features.append(features[i]) + ranked_index=[old_features.index(x) for x in rel_features if x in old_features] + + return rel_features ,ranked_index +#********************************************************************************************************************************* +def build_table(tables): + full_table = dict() + + file_set = set() + + for table in tables: + file_set.update(table.keys()) + for key, val in table.items(): + full_table[key] = val + + files = list(file_set) + return full_table, files +#********************************************************************************************************************************* +def convert_to_matrix(table, features, files): + mat = sp.lil.lil_matrix((len(files), len(features)), dtype=np.int8) + + print("Input Data Size = ", mat.get_shape()) + # the response vector + + cl = [0]*len(files) + + for key, val in table.items(): + k = files.index(key) + + if val[CLASS] is CLASS_BEN: + cl[k] = 1 + + for v in val[DATA]: + try: + idx = features.index(v) + mat[k, idx] = 1 + except Exception as e: + print(e) + pass + + return mat, cl +#****************************************************************************** +def delete_row_lil(mat, i): + if not isinstance(mat, sp.lil.lil_matrix): + raise ValueError("works only for LIL format -- use .tolil() first") + mat.rows = np.delete(mat.rows, i) + mat.data = np.delete(mat.data, i) + mat._shape = (mat._shape[0] - 1, mat._shape[1]) +#*****************************************************************Main Function******************************************************* +def main(): + simplefilter(action='ignore', category=FutureWarning) + args = parse_args() + tables = [] + f_set = set() + #read the data + for t_files in args.input_tables: + table, features = read_table(t_files) + f_set = f_set.union(features) + tables.append(table) + #************************************build table from data and convert to matrix*************************************************** + full_table, files = build_table(tables) + files.sort() + features = list(f_set) + features.sort() + mat, cl = convert_to_matrix(full_table, features, files) + print("************************Doing feature Ranking on all of the Data*************************") + r_features,ranked_index = relevant_features(mat, cl, features) + original_selected=ranked_index[1:301] + data = sparse.lil_matrix(sparse.csr_matrix(mat)[:,original_selected]) + + #******************************************Split data to train , test and validation********************************************** + seed = 10 + test_size = 0.2 + X_train, X_test, Y_train, Y_test= train_test_split(data, cl, test_size= test_size, random_state=seed) + test_size = 0.25 + X_train, X_val, Y_train, Y_val= train_test_split(X_train, Y_train, test_size= test_size, random_state=seed) + #*********************************************************************************************************************************** + print(" ") + print(" ") + print("*********Semi-Supervised Deep Learning Based Approach Against Label Flipping Attack in Malware Detection System*****************") + print(" ") + + X_train=sparse.csr_matrix(X_train) + print("row_train,column_train=", X_train.get_shape()) + print(" ") + X_val=sparse.csr_matrix(X_val) + row_val,column_val=X_val.get_shape() + print("row_val,column_val=",X_val.get_shape()) + print(" ") + X_test=sparse.csr_matrix(X_test) + row_test,column_test=X_test.get_shape() + print("row_test,column_test=",X_test.get_shape()) + print(" ") + print("********************************************************************") + #**************************************************Model Definition***************************************************************** + X_train_NoAttack=X_train.copy() + Y_train_NoAttack=Y_train[:] + + X_val_NoAttack=X_val.copy() + Y_val_NoAttack=Y_val[:] + + row_train_NoAttack,column_train_NoAttack=X_train_NoAttack.get_shape() + model_main = Sequential() + model_main.add(Embedding(row_train_NoAttack, 8, input_length=column_train_NoAttack)) + model_main.add(Conv1D(16,2, strides=2, padding='same')) + model_main.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main.add(Conv1D(32,2, strides=2, padding='same')) + model_main.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main.add(Conv1D(64,2, strides=2, padding='same')) + model_main.add(Flatten()) + model_main.add(Dense(1, activation='sigmoid')) + model_main.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main.fit(X_train_NoAttack, Y_train_NoAttack, epochs=200, verbose=0) + + Y_CNN_NoAttack=model_main.predict(X_test, verbose=0) + Y_predict_NoAttack=[0]*len(Y_CNN_NoAttack) + + for i in range(len(Y_CNN_NoAttack)): + if Y_CNN_NoAttack[i]<0.5: + Y_CNN_NoAttack[i]=0 + else: + Y_CNN_NoAttack[i]=1 + + for i in range(len(Y_CNN_NoAttack)): + Y_predict_NoAttack[i]= int(Y_CNN_NoAttack[i]) + #*****************************************************Result of Model without attack on X_test***************************************** + print("********************************Result of Model without attack******************************************************************") + loss, accuracy = model_main.evaluate(X_train_NoAttack, Y_train_NoAttack, verbose=2) + print('Accuracy for Train set: %f' % (accuracy*100)) + print('Loss for Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main.evaluate(X_val_NoAttack, Y_val_NoAttack, verbose=2) + print('Accuracy for Validation set: %f' % (accuracy*100)) + print('Loss for Train Validation set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main.evaluate(X_test, Y_test, verbose=2) + print('Accuracy for Test set: %f' % (accuracy*100)) + print('Loss for Test set:: %f' % (loss)) + print(" ") + + TN_NoAttack, FP_NoAttack, FN_NoAttack, TP_NoAttack = confusion_matrix(Y_test, Y_predict_NoAttack).ravel() + print("TN_NoAttack=",TN_NoAttack) + print("FP_NoAttack=",FP_NoAttack) + print("FN_NoAttack=",FN_NoAttack) + print("TP_NoAttack=",TP_NoAttack) + print(" ") + + if (FP_NoAttack+TN_NoAttack)>0: + FPR_NoAttack=FP_NoAttack/(FP_NoAttack+TN_NoAttack) + print("The FPR_NoAttack result=", FPR_NoAttack) + + if (FP_NoAttack+TN_NoAttack)>0: + TPR_NoAttack=TP_NoAttack/(TP_NoAttack+FN_NoAttack) + print("The TPR_NoAttack result=", TPR_NoAttack) + + if (TN_NoAttack+FP_NoAttack)>0: + TNR_NoAttack=TN_NoAttack/(TN_NoAttack+FP_NoAttack) + print("The TNR_NoAttack result=", TNR_NoAttack) + + if (FN_NoAttack+TP_NoAttack)>0: + FNR_NoAttack=FN_NoAttack/(FN_NoAttack+TP_NoAttack) + print("The FNR_NoAttack result=", FNR_NoAttack) + + if ((TN_NoAttack/(TN_NoAttack+FP_NoAttack))+(TP_NoAttack/(TP_NoAttack+FP_NoAttack)))>0: + AUC_NoAttack=1/(2*((TN_NoAttack/(TN_NoAttack+FP_NoAttack))+(TP_NoAttack/(TP_NoAttack+FP_NoAttack)))) + print("The AUC_NoAttack result=", AUC_NoAttack) + + if (TP_NoAttack+TN_NoAttack+FP_NoAttack+FN_NoAttack)>0: + ACC_NoAttack=(TP_NoAttack+TN_NoAttack)/(TP_NoAttack+TN_NoAttack+FP_NoAttack+FN_NoAttack) + print("The ACC_NoAttack result=", ACC_NoAttack) + + if ((TP_NoAttack+FP_NoAttack)*(TP_NoAttack+FN_NoAttack)*(TN_NoAttack+FP_NoAttack)*(TN_NoAttack+FN_NoAttack))>0: + MCC_NoAttack=(TP_NoAttack*TN_NoAttack-FP_NoAttack*FN_NoAttack)/math.sqrt((TP_NoAttack+FP_NoAttack)*(TP_NoAttack+FN_NoAttack)*(TN_NoAttack+FP_NoAttack)*(TN_NoAttack+FN_NoAttack)) + print("The Matthews correlation coefficient result=", MCC_NoAttack) + print(" ") + print("*****************************************************End of Without Attack part************************************************") + print(" ") + print(" ") + print(" ") + print("*****************************************************Label Flipping Attack*****************************************************") + print(" ") + #************************** + # finding Malware of Train data + malware_train= sparse.lil_matrix(X_train) + cl_malware=list() + z_m=0 + count_m=0 + for i, j in enumerate(Y_train): + if j == 1: + delete_row_lil(malware_train, i-count_m) + count_m=count_m+1 + else: + cl_malware.insert(z_m, 1) + z_m=z_m+1 + #*************************** + #Finding Benign of Train data + cl_X_train=list(Y_train) + benign_train=sparse.lil_matrix(X_train) + z_b=0 + count_b=0 + cl_benign=list() + for i, j in enumerate(cl_X_train): + if j == 0: + delete_row_lil(benign_train, i-count_b) + count_b=count_b+1 + else: + cl_benign.insert(z_b, 1) + z_b=z_b+1 + print("***********Size of Each Data Part:**********") + print("malware_train=", malware_train.get_shape()) + print("benign_train=", benign_train.get_shape()) + #*************************************************** + row_malware_train,column_malware_train=malware_train.get_shape() + #Number_of_flipped_label=int(row_malware_train) + + X_train_LFA=X_train.copy() + Y_train_LFA=Y_train[:] + + row_train_LFA,column_train_LFA=X_train_LFA.get_shape() + clusterer = KMeans(n_clusters=2, random_state=10) + X=X_train_LFA.toarray() + t0=time.time() + cluster_labels = clusterer.fit_predict(X) + sample_silhouette_values = silhouette_samples(X, cluster_labels) + #print("sample_silhouette_values=",sample_silhouette_values) + + flipped_Y_train=list(Y_train_LFA) + counter=0 + for new_index in range(row_train_LFA): + if (sample_silhouette_values[new_index]<0.1): #and (flipped_Y_train[new_index]==0) + flipped_Y_train[new_index]=abs(flipped_Y_train[new_index]-1) #flipped_Y_train[new_index]=1 + counter=counter+1 + + print("Flipped counter=", counter) + t1=time.time() + print("Time for Label Flipping Attack =",t1-t0) + print(" ") + + #************************************************************************** + model_main_LFA_Final = Sequential() + model_main_LFA_Final.add(Embedding(row_train_LFA, 8, input_length=column_train_LFA)) + model_main_LFA_Final.add(Conv1D(16,2, strides=2, padding='same')) + model_main_LFA_Final.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_LFA_Final.add(Conv1D(32,2, strides=2, padding='same')) + model_main_LFA_Final.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_LFA_Final.add(Conv1D(64,2, strides=2, padding='same')) + model_main_LFA_Final.add(Flatten()) + model_main_LFA_Final.add(Dense(1, activation='sigmoid')) + model_main_LFA_Final.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main_LFA_Final.fit(X_train_LFA, flipped_Y_train, epochs=200, verbose=0) + + + Y_predict_LFA=model_main_LFA_Final.predict(X_test, verbose=0) + Y_predict_LFA_Final=[0]*len(Y_predict_LFA) + + for i in range(len(Y_predict_LFA)): + if Y_predict_LFA[i]<0.5: + Y_predict_LFA[i]=0 + else: + Y_predict_LFA[i]=1 + + for i in range(len(Y_predict_LFA)): + Y_predict_LFA_Final[i]= int(Y_predict_LFA[i]) + #*****************************************************Result of Model with LFA ****************************************************** + print("********************************Result of Model with LFA attack **************************************************************") + print(" ") + loss, accuracy = model_main_LFA_Final.evaluate(X_train_LFA, flipped_Y_train, verbose=2) + print('Accuracy for Train set: %f' % (accuracy*100)) + print('Loss for Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main_LFA_Final.evaluate(X_test, Y_test, verbose=2) + print('Accuracy for Test set: %f' % (accuracy*100)) + print('Loss for Test set:: %f' % (loss)) + print(" ") + + TN_LFA, FP_LFA, FN_LFA, TP_LFA = confusion_matrix(Y_test, Y_predict_LFA_Final).ravel() + print("TN_LFA=",TN_LFA) + print("FP_LFA=",FP_LFA) + print("FN_LFA=",FN_LFA) + print("TP_LFA=",TP_LFA) + print(" ") + + if (FP_LFA+TN_LFA)>0: + FPR_LFA=FP_LFA/(FP_LFA+TN_LFA) + print("The FPR_LFA result=", FPR_LFA) + + if (FP_LFA+TN_LFA)>0: + TPR_LFA=TP_LFA/(TP_LFA+FN_LFA) + print("The TPR_LFA result=", TPR_LFA) + + if (TN_LFA+FP_LFA)>0: + TNR_LFA=TN_LFA/(TN_LFA+FP_LFA) + print("The TNR_LFA result=", TNR_LFA) + + if (FN_LFA+TP_LFA)>0: + FNR_LFA=FN_LFA/(FN_LFA+TP_LFA) + print("The FNR_LFA result=", FNR_LFA) + + if ((TN_LFA/(TN_LFA+FP_LFA))+(TP_LFA/(TP_LFA+FP_LFA)))>0: + AUC_LFA=1/(2*((TN_LFA/(TN_LFA+FP_LFA))+(TP_LFA/(TP_LFA+FP_LFA)))) + print("The AUC_LFA result=", AUC_LFA) + + if (TP_LFA+TN_LFA+FP_LFA+FN_LFA)>0: + ACC_LFA=(TP_LFA+TN_LFA)/(TP_LFA+TN_LFA+FP_LFA+FN_LFA) + print("The ACC_LFAk result=", ACC_LFA) + + if ((TP_LFA+FP_LFA)*(TP_LFA+FN_LFA)*(TN_LFA+FP_LFA)*(TN_LFA+FN_LFA))>0: + MCC_LFA=(TP_LFA*TN_LFA-FP_LFA*FN_LFA)/math.sqrt((TP_LFA+FP_LFA)*(TP_LFA+FN_LFA)*(TN_LFA+FP_LFA)*(TN_LFA+FN_LFA)) + print("The Matthews correlation coefficient result=", MCC_LFA) + print(" ") + print("************************************************End of Label Flipping Attack part**********************************************") + print(" ") + print(" ") + print(" ") + print("*****************************************************KNN Based Semi-Supervised Defense(KSD)************************************") + print(" ") + + X_train_KNN=X_train.copy() + Y_train_KNN=flipped_Y_train[:] + + X_val_KNN=X_val.copy() + Y_val_KNN=Y_val[:] + + row_train_KNN,column_train_KNN=X_train_KNN.get_shape() + + Number_of_flipped_label=int(row_train_KNN/50) + Y_train_corrected_By_KNN=list(Y_train_KNN) + + c=0 + m=0 + t2=time.time() + + for i in list(range(Number_of_flipped_label)): + row_KNN=X_train_KNN.getrow(i) + distances = sklearn.metrics.pairwise.manhattan_distances(row_KNN,X_val_KNN) + indices = distances.argsort()[:10] + d=indices[0] + a=d[0:10] + + F=0 + for j in range(len(a)): + t=a[j] + F=F+Y_val_KNN[t] + fraction=F/10 + if fraction>=0.5: + Y_train_corrected_By_KNN[i]=1 + m=m+1 + else: + Y_train_corrected_By_KNN[i]=0 + c=c+1 + Y_train_corrected_By_KNN_Final=np.array(Y_train_corrected_By_KNN) + t3=time.time() + print("Time for KNN Based Semi-Supervised Defense(KSD) =",t3-t2) + print(" ") + + model_main_KNN = Sequential() + model_main_KNN.add(Embedding(row_train_NoAttack, 8, input_length=column_train_NoAttack)) + model_main_KNN.add(Conv1D(16,2, strides=2, padding='same')) + model_main_KNN.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_KNN.add(Conv1D(32,2, strides=2, padding='same')) + model_main_KNN.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_KNN.add(Conv1D(64,2, strides=2, padding='same')) + model_main_KNN.add(Flatten()) + model_main_KNN.add(Dense(1, activation='sigmoid')) + model_main_KNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main_KNN.fit(X_train_KNN,Y_train_corrected_By_KNN_Final, epochs=20, batch_size=32, verbose=0) + Y_predict_KNN=model_main_KNN.predict(X_test, verbose=0) + + Y_predict_KNN_Final=[0]*len(Y_predict_KNN) + for i in range(len(Y_predict_KNN)): + if Y_predict_KNN[i]<0.5: + Y_predict_KNN[i]=0 + else: + Y_predict_KNN[i]=1 + + for i in range(len(Y_predict_KNN)): + Y_predict_KNN_Final[i]= int(Y_predict_KNN[i]) + #*****************************************************Result of Model After KNN Based Defense***************************************** + print("************************Result After KNN_Based Defense************************************************************************") + print(" ") + + loss, accuracy = model_main_KNN.evaluate(X_train_KNN, Y_train_KNN, verbose=0) + print('Accuracy for Train set: %f' % (accuracy*100)) + print('Loss for Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main_KNN.evaluate(X_test, Y_test, batch_size=32, verbose=0) + print('Accuracy After KNN-Based Defense: %f' % (accuracy*100)) + print('Loss After KNN-Based Defense: %f' % (loss)) + print(" ") + + TN_KNN, FP_KNN, FN_KNN, TP_KNN = confusion_matrix(Y_test, Y_predict_KNN_Final).ravel() + print("TN_KNN=",TN_KNN) + print("FP_KNN=",FP_KNN) + print("FN_KNN=",FN_KNN) + print("TP_KNN=",TP_KNN) + print(" ") + + if (FP_KNN+TN_KNN)>0: + FPR_KNN=FP_KNN/(FP_KNN+TN_KNN) + print("The FPR_KNN result=", FPR_KNN) + + if (FP_KNN+TN_KNN)>0: + TPR_KNN=TP_KNN/(TP_KNN+FN_KNN) + print("The TPR_KNN result=", TPR_KNN) + + if (TN_KNN+FP_KNN)>0: + TNR_KNN=TN_KNN/(TN_KNN+FP_KNN) + print("The TNR_KNN result=", TNR_KNN) + + if (FN_KNN+TP_KNN)>0: + FNR_KNN=FN_KNN/(FN_KNN+TP_KNN) + print("The FNR_KNN result=", FNR_KNN) + + if ((TN_KNN/(TN_KNN+FP_KNN))+(TP_KNN/(TP_KNN+FP_KNN)))>0: + AUC_KNN=1/(2*((TN_KNN/(TN_KNN+FP_KNN))+(TP_KNN/(TP_KNN+FP_KNN)))) + print("The AUC_KNN result=", AUC_KNN) + + if (TP_KNN+TN_KNN+FP_KNN+FN_KNN)>0: + ACC_KNN=(TP_KNN+TN_KNN)/(TP_KNN+TN_KNN+FP_KNN+FN_KNN) + print("The ACC_KNN result=", ACC_KNN) + + if ((TP_KNN+FP_KNN)*(TP_KNN+FN_KNN)*(TN_KNN+FP_KNN)*(TN_KNN+FN_KNN))>0: + MCC_KNN=(TP_KNN*TN_KNN-FP_KNN*FN_KNN)/math.sqrt((TP_KNN+FP_KNN)*(TP_KNN+FN_KNN)*(TN_KNN+FP_KNN)*(TN_KNN+FN_KNN)) + print("The Matthews correlation coefficient result=", MCC_KNN) + print(" ") + print("************************************************End of KNN Based Semi-Supervised Defense(KSD) part*****************************") + print(" ") + print(" ") + print(" ") + print("*****************************************************Label Based Semi-supervised Defense(LSD)**********************************") + print(" ") + #***********************label Propagation and Label Spreading for Using in Label Based Semi-supervised Defense(LSD) ******************* + X_train_LSD=X_train.copy() + Y_train_LSD=flipped_Y_train[:] + + X_val_LSD=X_val.copy() + Y_val_LSD=Y_val[:] + row_val_LSD,column_val_LSD=X_val_LSD.get_shape() + row_train_LSD,column_train_LSD=X_train_LSD.get_shape() + + t4=time.time() + + labels = np.full(row_train_LSD, -1) + for i in range(row_val_LSD): + labels[i] = Y_val_LSD[i] + + X=X_train_LSD.toarray() + label_spread = label_propagation.LabelSpreading(kernel='knn', alpha=0.8) + label_propa=label_propagation.LabelPropagation(kernel='knn', gamma=20, n_neighbors=7, max_iter=1000, tol=0.001, n_jobs=None) + label_spread.fit(X, labels) + label_propa.fit(X, labels) + output_labels_spread = label_spread.transduction_ + output_labels_propa = label_propa.transduction_ + #*******************Convolutional Neural Network for Using in Label Based Semi-supervised Defense(LSD) ****************************** + CNN_model_for_LSD = Sequential() + CNN_model_for_LSD.add(Embedding(row_train_LSD, 8, input_length=column_train_LSD)) + CNN_model_for_LSD.add(Conv1D(16,2, strides=2, padding='same')) + CNN_model_for_LSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + CNN_model_for_LSD.add(Conv1D(32,2, strides=2, padding='same')) + CNN_model_for_LSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + CNN_model_for_LSD.add(Conv1D(64,2, strides=2, padding='same')) + CNN_model_for_LSD.add(Flatten()) + + CNN_model_for_LSD.add(Dense(1, activation='sigmoid')) + CNN_model_for_LSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + CNN_model_for_LSD.fit(X_train_LSD, Y_train_LSD, epochs=200, verbose=0) + + Y_predict_CNN_for_LSD=CNN_model_for_LSD.predict(X_train_LSD, verbose=0) + + Y_predict_CNN_LSD_Final=[0]*len(Y_predict_CNN_for_LSD) + for i in range(len(Y_predict_CNN_for_LSD)): + if Y_predict_CNN_for_LSD[i]<0.5: + Y_predict_CNN_for_LSD[i]=0 + else: + Y_predict_CNN_for_LSD[i]=1 + + for i in range(len(Y_predict_CNN_for_LSD)): + Y_predict_CNN_LSD_Final[i]= int(Y_predict_CNN_for_LSD[i]) + #*******************************************Voting Between CNN , label Propagation and Label Spreading************************** + Y_predict_LSD_Final=[0]*len(Y_train) + for i in range(len(Y_train)): + c=Y_train_LSD[i]+Y_predict_CNN_LSD_Final[i]+output_labels_propa[i]+output_labels_spread[i] + if 2<=c: + Y_predict_LSD_Final[i]=1 + else: + Y_predict_LSD_Final[i]=0 + t5=time.time() + print("Time for Label Based Semi-supervised Defense =",t5-t4) + print(" ") + #********************************************************************************************************************************* + model_main_LSD = Sequential() + model_main_LSD.add(Embedding(row_train_LSD, 8, input_length=column_train_LSD)) + model_main_LSD.add(Conv1D(16,2, strides=2, padding='same')) + model_main_LSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_LSD.add(Conv1D(32,2, strides=2, padding='same')) + model_main_LSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_LSD.add(Conv1D(64,2, strides=2, padding='same')) + model_main_LSD.add(Flatten()) + model_main_LSD.add(Dense(1, activation='sigmoid')) + model_main_LSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main_LSD.fit(X_train_LSD, Y_predict_LSD_Final, epochs=200, verbose=0) + + Y_predict_LSD_Defense=model_main_LSD.predict(X_test, verbose=0) + Y_predict_LSD_Defense_Final=[0]*len(Y_predict_LSD_Defense) + + for i in range(len(Y_predict_LSD_Defense)): + if Y_predict_LSD_Defense[i]<0.5: + Y_predict_LSD_Defense[i]=0 + else: + Y_predict_LSD_Defense[i]=1 + + for i in range(len(Y_predict_LSD_Defense)): + Y_predict_LSD_Defense_Final[i]= int(Y_predict_LSD_Defense[i]) + #**************************************Result of Model after Label Based Semi-supervised Defense(LSD)********************************** + print("************************Result of Model after Label Based Semi-supervised Defense(LSD)*****************************************") + print(" ") + loss, accuracy = model_main.evaluate(X_train, Y_predict_LSD_Final, verbose=2) + print('Accuracy for Train set: %f' % (accuracy*100)) + print('Loss for Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main.evaluate(X_test, Y_test, verbose=2) + print('Accuracy for Test set: %f' % (accuracy*100)) + print('Loss for Test set:: %f' % (loss)) + print(" ") + + TN_LSD, FP_LSD, FN_LSD, TP_LSD = confusion_matrix(Y_test, Y_predict_LSD_Defense_Final).ravel() + print("TN_LSD=",TN_LSD) + print("FP_LSD=",FP_LSD) + print("FN_LSD=",FN_LSD) + print("TP_LSD=",TP_LSD) + print(" ") + + if (FP_LSD+TN_LSD)>0: + FPR_LSD=FP_LSD/(FP_LSD+TN_LSD) + print("The FPR_LSD result=", FPR_LSD) + + if (FP_LSD+TN_LSD)>0: + TPR_LSD=TP_LSD/(TP_LSD+FN_LSD) + print("The TPR_LSD result=", TPR_LSD) + + if (TN_LSD+FP_LSD)>0: + TNR_LSD=TN_LSD/(TN_LSD+FP_LSD) + print("The TNR_LSD result=", TNR_LSD) + + if (FN_LSD+TP_LSD)>0: + FNR_LSD=FN_LSD/(FN_LSD+TP_LSD) + print("The FNR_LSD result=", FNR_LSD) + + if ((TN_LSD/(TN_LSD+FP_LSD))+(TP_LSD/(TP_LSD+FP_LSD)))>0: + AUC_LSD=1/(2*((TN_LSD/(TN_LSD+FP_LSD))+(TP_LSD/(TP_LSD+FP_LSD)))) + print("The AUC result=", AUC_LSD) + + if (TP_LSD+TN_LSD+FP_LSD+FN_LSD)>0: + ACC_LSD=(TP_LSD+TN_LSD)/(TP_LSD+TN_LSD+FP_LSD+FN_LSD) + print("The ACC result=", ACC_LSD) + + if ((TP_LSD+FP_LSD)*(TP_LSD+FN_LSD)*(TN_LSD+FP_LSD)*(TN_LSD+FN_LSD))>0: + MCC_LSD=(TP_LSD*TN_LSD-FP_LSD*FN_LSD)/math.sqrt((TP_LSD+FP_LSD)*(TP_LSD+FN_LSD)*(TN_LSD+FP_LSD)*(TN_LSD+FN_LSD)) + print("The Matthews correlation coefficient result=", MCC_LSD) + print(" ") + print("*****************************************************End of Label Based Semi-supervised Defense(LSD)***************************") + print(" ") + print(" ") + print(" ") + print("*****************************************************Clustering Based Semi-supervised Defense(CSD)*****************************") + print(" ") + + X_train_CSD=X_train.copy() + Y_train_CSD=flipped_Y_train[:] + + X_val_CSD=X_val.copy() + Y_val_CSD=Y_val[:] + row_train_CSD,column_train_CSD=X_train_CSD.get_shape() + + t6=time.time() + + Y_predict_val_from_CNN_Model=model_main.predict(X_val_CSD, verbose=0) + + Y_predict_val_from_CNN_Model_Final=[0]*len(Y_predict_val_from_CNN_Model) + for i in range(len(Y_predict_val_from_CNN_Model)): + if Y_predict_val_from_CNN_Model[i]<0.5: + Y_predict_val_from_CNN_Model[i]=0 + else: + Y_predict_val_from_CNN_Model[i]=1 + for i in range(len(Y_predict_val_from_CNN_Model)): + Y_predict_val_from_CNN_Model_Final[i]= int(Y_predict_val_from_CNN_Model[i]) + + adjusted_rand_score_val=metrics.adjusted_rand_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) + adjusted_mutual_info_score_val=metrics.adjusted_mutual_info_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) + homogeneity_score_val=metrics.homogeneity_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) + fowlkes_mallows_score_val=metrics.fowlkes_mallows_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) + + for i in range(20): #row_train + Y_temp=Y_val_CSD.copy() + + row=X_train_CSD.getrow(i) + X_temp = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_val_CSD, row)))) + Y_temp.append(Y_train_CSD[i]) + + Y_predict_CNN_compute_CSD=model_main.predict(X_temp, verbose=0) + + Y_predict_temp=[0]*len(Y_predict_CNN_compute_CSD) + + for n in range(len(Y_predict_CNN_compute_CSD)): + if Y_predict_CNN_compute_CSD[n]<0.5: + Y_predict_CNN_compute_CSD[n]=0 + else: + Y_predict_CNN_compute_CSD[n]=1 + + for m in range(len(Y_predict_CNN_compute_CSD)): + Y_predict_temp[m]= int(Y_predict_CNN_compute_CSD[m]) + + adjusted_rand_score_temp=metrics.adjusted_rand_score(Y_temp, Y_predict_temp) + adjusted_mutual_info_score_temp=metrics.adjusted_mutual_info_score(Y_temp, Y_predict_temp) + homogeneity_score_temp=metrics.homogeneity_score(Y_temp, Y_predict_temp) + fowlkes_mallows_score_temp=metrics.fowlkes_mallows_score(Y_temp, Y_predict_temp) + + landa1=abs(adjusted_rand_score_temp-adjusted_rand_score_val) + landa2=abs(adjusted_mutual_info_score_temp-adjusted_mutual_info_score_val) + landa3=abs(homogeneity_score_temp-homogeneity_score_val) + landa4=abs(fowlkes_mallows_score_temp-fowlkes_mallows_score_val) + + sum_of_diffrences=landa1+landa2+landa3+landa4 + + if sum_of_diffrences<0.1: + X_val_CSD = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_val_CSD, row)))) + Y_val_CSD.append(Y_train_CSD[i]) + Y_predict_CNN_inside_CSD=model_main.predict(X_val_CSD, verbose=0) + + Y_predict_CNN_inside_CSD_Final=[0]*len(Y_predict_CNN_inside_CSD) #Y_predict_CNN_inside + for j in range(len(Y_predict_CNN_inside_CSD)): #Y_predict_CNN_inside + if Y_predict_CNN_inside_CSD[j]<0.5: + Y_predict_CNN_inside_CSD[j]=0 + else: + Y_predict_CNN_inside_CSD[j]=1 + + for k in range(len(Y_predict_CNN_inside_CSD)): #Y_predict_CNN_inside + Y_predict_CNN_inside_CSD_Final[k]= int(Y_predict_CNN_inside_CSD[k]) + + adjusted_rand_score_val=metrics.adjusted_rand_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) + adjusted_mutual_info_score_val=metrics.adjusted_mutual_info_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) + homogeneity_score_val=metrics.homogeneity_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) + fowlkes_mallows_score_val=metrics.fowlkes_mallows_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) + t7=time.time() + print("Time for Clustering Based Semi-supervised Defense =",t7-t6) + print(" ") + #**************************************************************************************** + X_train_Final_CSD= X_val_CSD.copy() + Y_train_Final_CSD=Y_val_CSD.copy() + row_train_CSD_Final,col_train_CSD_Final=X_train_Final_CSD.get_shape() + + model_main_CSD = Sequential() + model_main_CSD.add(Embedding(row_train_CSD_Final, 8, input_length=col_train_CSD_Final)) + model_main_CSD.add(Conv1D(16,2, strides=2, padding='same')) + model_main_CSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_CSD.add(Conv1D(32,2, strides=2, padding='same')) + model_main_CSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_CSD.add(Conv1D(64,2, strides=2, padding='same')) + model_main_CSD.add(Flatten()) + model_main_CSD.add(Dense(1, activation='sigmoid')) + model_main_CSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main_CSD.fit(X_train_Final_CSD, Y_train_Final_CSD, epochs=200, verbose=0) + + Y_test_predict_CSD=model_main_CSD.predict(X_test, verbose=0) + + Y_test_predict_CSD_Final=[0]*len(Y_test_predict_CSD) + for i in range(len(Y_test_predict_CSD)): + if Y_test_predict_CSD[i]<0.5: + Y_test_predict_CSD[i]=0 + else: + Y_test_predict_CSD[i]=1 + + for i in range(len(Y_test_predict_CSD)): + Y_test_predict_CSD_Final[i]= int(Y_test_predict_CSD[i]) + + #*****************************************************Result of Model after Clustering Based Semi-supervised Defense(CSD)************** + print("***********************Result of Model after Clustering Based Semi-supervised Defense(CSD)*************************************") + print(" ") + + loss, accuracy = model_main_CSD.evaluate(X_train_Final_CSD, Y_train_Final_CSD, verbose=2) + print('Accuracy for New Train set: %f' % (accuracy*100)) + print('Loss for New Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main_CSD.evaluate(X_test, Y_test, verbose=2) + print('Accuracy for Test set: %f' % (accuracy*100)) + print('Loss for Test set:: %f' % (loss)) + print(" ") + + TN_CSD, FP_CSD, FN_CSD, TP_CSD = confusion_matrix(Y_test, Y_test_predict_CSD_Final).ravel() + print("TN_CSD=",TN_CSD) + print("FP_CSD=",FP_CSD) + print("FN_CSD=",FN_CSD) + print("TP_CSD=",TP_CSD) + print(" ") + + if (FP_CSD+TN_CSD)>0: + FPR_CSD=FP_CSD/(FP_CSD+TN_CSD) + print("The FPR_CSD result=", FPR_CSD) + + if (FP_CSD+TN_CSD)>0: + TPR_CSD=TP_CSD/(TP_CSD+FN_CSD) + print("The TPR_CSD result=", TPR_CSD) + + if (TN_CSD+FP_CSD)>0: + TNR_CSD=TN_CSD/(TN_CSD+FP_CSD) + print("The TNR_CSD result=", TNR_CSD) + + if (FN_CSD+TP_CSD)>0: + FNR_CSD=FN_CSD/(FN_CSD+TP_CSD) + print("The FNR_CSD result=", FNR_CSD) + + if ((TN_CSD/(TN_CSD+FP_CSD))+(TP_CSD/(TP_CSD+FP_CSD)))>0: + AUC_CSD=1/(2*((TN_CSD/(TN_CSD+FP_CSD))+(TP_CSD/(TP_CSD+FP_CSD)))) + print("The AUC_CSD result=", AUC_CSD) + + if (TP_CSD+TN_CSD+FP_CSD+FN_CSD)>0: + ACC_CSD=(TP_CSD+TN_CSD)/(TP_CSD+TN_CSD+FP_CSD+FN_CSD) + print("The ACC_CSD result=", ACC_CSD) + + if ((TP_CSD+FP_CSD)*(TP_CSD+FN_CSD)*(TN_CSD+FP_CSD)*(TN_CSD+FN_CSD))>0: + MCC_CSD=(TP_CSD*TN_CSD-FP_CSD*FN_CSD)/math.sqrt((TP_CSD+FP_CSD)*(TP_CSD+FN_CSD)*(TN_CSD+FP_CSD)*(TN_CSD+FN_CSD)) + print("The Matthews correlation coefficient result=", MCC_CSD) + print(" ") + print("************************************************End of Clustering Based Semi-supervised Defense(LSD)***************************") + print(" ") + print(" ") + print(" ") +#****************************************************************************************************************************************** +if __name__ == "__main__": + main() +#****************************************************************************** \ No newline at end of file diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_without_Feature_Selection(LSD_CSD_KDD).py b/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_without_Feature_Selection(LSD_CSD_KDD).py new file mode 100644 index 0000000..baf5038 --- /dev/null +++ b/Taheri2020NCAA-labelflipping_Sourcecode/Label_Flipping_Paper_without_Feature_Selection(LSD_CSD_KDD).py @@ -0,0 +1,833 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 21 14:25:17 2019 + +@author: Rahim +""" +#*****************************************************************import Library***************************************************************************** +from __future__ import print_function +from sklearn.feature_selection import SelectFromModel +from sklearn.feature_selection import SelectKBest, f_regression +from sklearn.model_selection import KFold +from sklearn.model_selection import cross_val_score +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import classification_report +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix +from sklearn import model_selection +from sklearn.feature_selection import RFE +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import ExtraTreesClassifier +from sklearn.ensemble import RandomForestRegressor +from scipy.sparse import csr_matrix, vstack, hstack +from scipy.sparse import coo_matrix +from keras.preprocessing.text import one_hot +from sklearn import metrics +from sklearn.metrics import silhouette_samples, silhouette_score +from sklearn.semi_supervised import LabelPropagation +from sklearn.semi_supervised import LabelSpreading +from sklearn.semi_supervised import label_propagation +from sklearn.metrics import roc_auc_score +from sklearn.metrics import f1_score +from sklearn.cluster import KMeans +import math +#import keras +from keras.models import Sequential +from keras.layers import Dense, Dropout, Activation , Flatten +from sklearn.metrics import log_loss +from keras.optimizers import SGD +from keras.layers.normalization import BatchNormalization +from keras.layers.convolutional import UpSampling2D +from keras.layers.convolutional import Conv2D, MaxPooling2D, MaxPooling1D +from keras.layers.embeddings import Embedding +from scipy import sparse +import pandas as pd +import numpy as np +#import random +import sklearn +from sklearn.metrics.pairwise import manhattan_distances +from keras.models import Model +from keras.layers import Conv1D, multiply, GlobalMaxPool1D, Input , Lambda +import time +import argparse +#import math +from numpy import * +import os.path as osp +import scipy.sparse as sp +import pickle +from sklearn.metrics import accuracy_score +#********************************************************************************************************************************* +CLASS = 'class' +CLASS_BEN = 'B' +CLASS_MAL = 'M' +DATA = 'data' +#********************************************Functions that will be used in this program***************************************** +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input-tables', nargs='*', dest='input_tables') + + args = parser.parse_args() + + return args +#********************************************************************************************************************************* +def read_table(table_file): + + table = dict() + + with open(table_file, 'rb') as handle: + while True: + try: + table = pickle.load(handle) + except EOFError: + break + + f_set=set() + + for k,v in table.items(): + for feature in v[DATA]: + f_set.add(feature) + + return table , f_set +#********************************************************************************************************************************* +def build_table(tables): + full_table = dict() + + file_set = set() + + for table in tables: + file_set.update(table.keys()) + for key, val in table.items(): + full_table[key] = val + + files = list(file_set) + return full_table, files +#********************************************************************************************************************************* +def convert_to_matrix(table, features, files): + mat = sp.lil.lil_matrix((len(files), len(features)), dtype=np.int8) + + print("Input Data Size = ", mat.get_shape()) + # the response vector + + cl = [0]*len(files) + + for key, val in table.items(): + k = files.index(key) + + if val[CLASS] is CLASS_BEN: + cl[k] = 1 + + for v in val[DATA]: + try: + idx = features.index(v) + mat[k, idx] = 1 + except Exception as e: + print(e) + pass + + return mat, cl +#****************************************************************************** +def delete_row_lil(mat, i): + if not isinstance(mat, sp.lil.lil_matrix): + raise ValueError("works only for LIL format -- use .tolil() first") + mat.rows = np.delete(mat.rows, i) + mat.data = np.delete(mat.data, i) + mat._shape = (mat._shape[0] - 1, mat._shape[1]) +#****************************************************************************** +def relevant_features(data, response_vector, features): + rel_features = list() + ranked_index=list() + + model =RandomForestRegressor() + rfe = RFE(model, 1) + fit = rfe.fit(data, response_vector) + old_features=features + + for i in fit.ranking_: + if i<len(features): + rel_features.append(features[i]) + ranked_index=[old_features.index(x) for x in rel_features if x in old_features] + + return rel_features ,ranked_index +#*****************************************************************Main Function******************************************************* +def main(): + args = parse_args() + tables = [] + f_set = set() + #read the data + for t_files in args.input_tables: + table, features = read_table(t_files) + f_set = f_set.union(features) + tables.append(table) + print(" ") + print(" ") + print("*********Semi-Supervised Deep Learning Based Approach Against Label Flipping Attack in Malware Detection System*****************") + print(" ") + #************************************build table from data and convert to matrix*************************************************** + full_table, files = build_table(tables) + files.sort() + features = list(f_set) + features.sort() + mat, cl = convert_to_matrix(full_table, features, files) + data = sparse.lil_matrix(sparse.csr_matrix(mat)) + #******************************************Split data to train , test and validation********************************************** + seed = 10 + test_size = 0.2 + X_train, X_test, Y_train, Y_test= train_test_split(data, cl, test_size= test_size, random_state=seed) + test_size = 0.25 + X_train, X_val, Y_train, Y_val= train_test_split(X_train, Y_train, test_size= test_size, random_state=seed) + #*********************************************************************************************************************************** + X_train=sparse.csr_matrix(X_train) + print("row_train,column_train=", X_train.get_shape()) + print(" ") + X_val=sparse.csr_matrix(X_val) + row_val,column_val=X_val.get_shape() + print("row_val,column_val=",X_val.get_shape()) + print(" ") + X_test=sparse.csr_matrix(X_test) + row_test,column_test=X_test.get_shape() + print("row_test,column_test=",X_test.get_shape()) + print(" ") + print("********************************************************************") + #**************************************************Model Definition***************************************************************** + X_train_NoAttack=X_train.copy() + Y_train_NoAttack=Y_train[:] + + X_val_NoAttack=X_val.copy() + Y_val_NoAttack=Y_val[:] + + row_train_NoAttack,column_train_NoAttack=X_train_NoAttack.get_shape() + model_main = Sequential() + model_main.add(Embedding(row_train_NoAttack, 8, input_length=column_train_NoAttack)) + model_main.add(Conv1D(16,2, strides=2, padding='same')) + model_main.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main.add(Conv1D(32,2, strides=2, padding='same')) + model_main.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main.add(Conv1D(64,2, strides=2, padding='same')) + model_main.add(Flatten()) + model_main.add(Dense(1, activation='sigmoid')) + model_main.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main.fit(X_train_NoAttack, Y_train_NoAttack, epochs=200, verbose=0) + + Y_CNN_NoAttack=model_main.predict(X_test, verbose=0) + Y_predict_NoAttack=[0]*len(Y_CNN_NoAttack) + + for i in range(len(Y_CNN_NoAttack)): + if Y_CNN_NoAttack[i]<0.5: + Y_CNN_NoAttack[i]=0 + else: + Y_CNN_NoAttack[i]=1 + + for i in range(len(Y_CNN_NoAttack)): + Y_predict_NoAttack[i]= int(Y_CNN_NoAttack[i]) + #*****************************************************Result of Model without attack on X_test***************************************** + print("********************************Result of Model without attack******************************************************************") + loss, accuracy = model_main.evaluate(X_train_NoAttack, Y_train_NoAttack, verbose=2) + print('Accuracy for Train set: %f' % (accuracy*100)) + print('Loss for Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main.evaluate(X_val_NoAttack, Y_val_NoAttack, verbose=2) + print('Accuracy for Validation set: %f' % (accuracy*100)) + print('Loss for Train Validation set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main.evaluate(X_test, Y_test, verbose=2) + print('Accuracy for Test set: %f' % (accuracy*100)) + print('Loss for Test set:: %f' % (loss)) + print(" ") + + TN_NoAttack, FP_NoAttack, FN_NoAttack, TP_NoAttack = confusion_matrix(Y_test, Y_predict_NoAttack).ravel() + print("TN_NoAttack=",TN_NoAttack) + print("FP_NoAttack=",FP_NoAttack) + print("FN_NoAttack=",FN_NoAttack) + print("TP_NoAttack=",TP_NoAttack) + print(" ") + + if (FP_NoAttack+TN_NoAttack)>0: + FPR_NoAttack=FP_NoAttack/(FP_NoAttack+TN_NoAttack) + print("The FPR_NoAttack result=", FPR_NoAttack) + + if (FP_NoAttack+TN_NoAttack)>0: + TPR_NoAttack=TP_NoAttack/(TP_NoAttack+FN_NoAttack) + print("The TPR_NoAttack result=", TPR_NoAttack) + + if (TN_NoAttack+FP_NoAttack)>0: + TNR_NoAttack=TN_NoAttack/(TN_NoAttack+FP_NoAttack) + print("The TNR_NoAttack result=", TNR_NoAttack) + + if (FN_NoAttack+TP_NoAttack)>0: + FNR_NoAttack=FN_NoAttack/(FN_NoAttack+TP_NoAttack) + print("The FNR_NoAttack result=", FNR_NoAttack) + + if ((TN_NoAttack/(TN_NoAttack+FP_NoAttack))+(TP_NoAttack/(TP_NoAttack+FP_NoAttack)))>0: + AUC_NoAttack=1/(2*((TN_NoAttack/(TN_NoAttack+FP_NoAttack))+(TP_NoAttack/(TP_NoAttack+FP_NoAttack)))) + print("The AUC_NoAttack result=", AUC_NoAttack) + + if (TP_NoAttack+TN_NoAttack+FP_NoAttack+FN_NoAttack)>0: + ACC_NoAttack=(TP_NoAttack+TN_NoAttack)/(TP_NoAttack+TN_NoAttack+FP_NoAttack+FN_NoAttack) + print("The ACC_NoAttack result=", ACC_NoAttack) + + if ((TP_NoAttack+FP_NoAttack)*(TP_NoAttack+FN_NoAttack)*(TN_NoAttack+FP_NoAttack)*(TN_NoAttack+FN_NoAttack))>0: + MCC_NoAttack=(TP_NoAttack*TN_NoAttack-FP_NoAttack*FN_NoAttack)/math.sqrt((TP_NoAttack+FP_NoAttack)*(TP_NoAttack+FN_NoAttack)*(TN_NoAttack+FP_NoAttack)*(TN_NoAttack+FN_NoAttack)) + print("The Matthews correlation coefficient result=", MCC_NoAttack) + print(" ") + print("*****************************************************End of Without Attack part************************************************") + print(" ") + print(" ") + print(" ") + print("*****************************************************Label Flipping Attack*****************************************************") + print(" ") + #************************** + # finding Malware of Train data + malware_train= sparse.lil_matrix(X_train) + cl_malware=list() + z_m=0 + count_m=0 + for i, j in enumerate(Y_train): + if j == 1: + delete_row_lil(malware_train, i-count_m) + count_m=count_m+1 + else: + cl_malware.insert(z_m, 1) + z_m=z_m+1 + #*************************** + #Finding Benign of Train data + cl_X_train=list(Y_train) + benign_train=sparse.lil_matrix(X_train) + z_b=0 + count_b=0 + cl_benign=list() + for i, j in enumerate(cl_X_train): + if j == 0: + delete_row_lil(benign_train, i-count_b) + count_b=count_b+1 + else: + cl_benign.insert(z_b, 1) + z_b=z_b+1 + print("***********Size of Each Data Part:**********") + print("malware_train=", malware_train.get_shape()) + print("benign_train=", benign_train.get_shape()) + #*************************************************** + row_malware_train,column_malware_train=malware_train.get_shape() + #Number_of_flipped_label=int(row_malware_train) + + X_train_LFA=X_train.copy() + Y_train_LFA=Y_train[:] + + row_train_LFA,column_train_LFA=X_train_LFA.get_shape() + clusterer = KMeans(n_clusters=2, random_state=10) + X=X_train_LFA.toarray() + t0=time.time() + cluster_labels = clusterer.fit_predict(X) + sample_silhouette_values = silhouette_samples(X, cluster_labels) + #print("sample_silhouette_values=",sample_silhouette_values) + + flipped_Y_train=list(Y_train_LFA) + counter=0 + for new_index in range(row_train_LFA): + if (sample_silhouette_values[new_index]<0.1): #and (flipped_Y_train[new_index]==0) + flipped_Y_train[new_index]=abs(flipped_Y_train[new_index]-1) #flipped_Y_train[new_index]=1 + counter=counter+1 + + print("Flipped counter=", counter) + t1=time.time() + print("Time for Label Flipping Attack =",t1-t0) + print(" ") + + #************************************************************************** + model_main_LFA_Final = Sequential() + model_main_LFA_Final.add(Embedding(row_train_LFA, 8, input_length=column_train_LFA)) + model_main_LFA_Final.add(Conv1D(16,2, strides=2, padding='same')) + model_main_LFA_Final.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_LFA_Final.add(Conv1D(32,2, strides=2, padding='same')) + model_main_LFA_Final.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_LFA_Final.add(Conv1D(64,2, strides=2, padding='same')) + model_main_LFA_Final.add(Flatten()) + model_main_LFA_Final.add(Dense(1, activation='sigmoid')) + model_main_LFA_Final.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main_LFA_Final.fit(X_train_LFA, flipped_Y_train, epochs=200, verbose=0) + + + Y_predict_LFA=model_main_LFA_Final.predict(X_test, verbose=0) + Y_predict_LFA_Final=[0]*len(Y_predict_LFA) + + for i in range(len(Y_predict_LFA)): + if Y_predict_LFA[i]<0.5: + Y_predict_LFA[i]=0 + else: + Y_predict_LFA[i]=1 + + for i in range(len(Y_predict_LFA)): + Y_predict_LFA_Final[i]= int(Y_predict_LFA[i]) + #*****************************************************Result of Model with LFA ****************************************************** + print("********************************Result of Model with LFA attack **************************************************************") + print(" ") + loss, accuracy = model_main_LFA_Final.evaluate(X_train_LFA, flipped_Y_train, verbose=2) + print('Accuracy for Train set: %f' % (accuracy*100)) + print('Loss for Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main_LFA_Final.evaluate(X_test, Y_test, verbose=2) + print('Accuracy for Test set: %f' % (accuracy*100)) + print('Loss for Test set:: %f' % (loss)) + print(" ") + + TN_LFA, FP_LFA, FN_LFA, TP_LFA = confusion_matrix(Y_test, Y_predict_LFA_Final).ravel() + print("TN_LFA=",TN_LFA) + print("FP_LFA=",FP_LFA) + print("FN_LFA=",FN_LFA) + print("TP_LFA=",TP_LFA) + print(" ") + + if (FP_LFA+TN_LFA)>0: + FPR_LFA=FP_LFA/(FP_LFA+TN_LFA) + print("The FPR_LFA result=", FPR_LFA) + + if (FP_LFA+TN_LFA)>0: + TPR_LFA=TP_LFA/(TP_LFA+FN_LFA) + print("The TPR_LFA result=", TPR_LFA) + + if (TN_LFA+FP_LFA)>0: + TNR_LFA=TN_LFA/(TN_LFA+FP_LFA) + print("The TNR_LFA result=", TNR_LFA) + + if (FN_LFA+TP_LFA)>0: + FNR_LFA=FN_LFA/(FN_LFA+TP_LFA) + print("The FNR_LFA result=", FNR_LFA) + + if ((TN_LFA/(TN_LFA+FP_LFA))+(TP_LFA/(TP_LFA+FP_LFA)))>0: + AUC_LFA=1/(2*((TN_LFA/(TN_LFA+FP_LFA))+(TP_LFA/(TP_LFA+FP_LFA)))) + print("The AUC_LFA result=", AUC_LFA) + + if (TP_LFA+TN_LFA+FP_LFA+FN_LFA)>0: + ACC_LFA=(TP_LFA+TN_LFA)/(TP_LFA+TN_LFA+FP_LFA+FN_LFA) + print("The ACC_LFAk result=", ACC_LFA) + + if ((TP_LFA+FP_LFA)*(TP_LFA+FN_LFA)*(TN_LFA+FP_LFA)*(TN_LFA+FN_LFA))>0: + MCC_LFA=(TP_LFA*TN_LFA-FP_LFA*FN_LFA)/math.sqrt((TP_LFA+FP_LFA)*(TP_LFA+FN_LFA)*(TN_LFA+FP_LFA)*(TN_LFA+FN_LFA)) + print("The Matthews correlation coefficient result=", MCC_LFA) + print(" ") + print("************************************************End of Label Flipping Attack part**********************************************") + print(" ") + print(" ") + print(" ") + print("*****************************************************KNN Based Semi-Supervised Defense(KSD)************************************") + print(" ") + + X_train_KNN=X_train.copy() + Y_train_KNN=flipped_Y_train[:] + + X_val_KNN=X_val.copy() + Y_val_KNN=Y_val[:] + + row_train_KNN,column_train_KNN=X_train_KNN.get_shape() + + Number_of_flipped_label=int(row_train_KNN/50) + Y_train_corrected_By_KNN=list(Y_train_KNN) + + c=0 + m=0 + t2=time.time() + + for i in list(range(Number_of_flipped_label)): + row_KNN=X_train_KNN.getrow(i) + distances = sklearn.metrics.pairwise.manhattan_distances(row_KNN,X_val_KNN) + indices = distances.argsort()[:10] + d=indices[0] + a=d[0:10] + + F=0 + for j in range(len(a)): + t=a[j] + F=F+Y_val_KNN[t] + fraction=F/10 + if fraction>=0.5: + Y_train_corrected_By_KNN[i]=1 + m=m+1 + else: + Y_train_corrected_By_KNN[i]=0 + c=c+1 + Y_train_corrected_By_KNN_Final=np.array(Y_train_corrected_By_KNN) + t3=time.time() + print("Time for KNN Based Semi-Supervised Defense(KSD) =",t3-t2) + print(" ") + + model_main_KNN = Sequential() + model_main_KNN.add(Embedding(row_train_NoAttack, 8, input_length=column_train_NoAttack)) + model_main_KNN.add(Conv1D(16,2, strides=2, padding='same')) + model_main_KNN.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_KNN.add(Conv1D(32,2, strides=2, padding='same')) + model_main_KNN.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_KNN.add(Conv1D(64,2, strides=2, padding='same')) + model_main_KNN.add(Flatten()) + model_main_KNN.add(Dense(1, activation='sigmoid')) + model_main_KNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main_KNN.fit(X_train_KNN,Y_train_corrected_By_KNN_Final, epochs=20, batch_size=32, verbose=0) + Y_predict_KNN=model_main_KNN.predict(X_test, verbose=0) + + Y_predict_KNN_Final=[0]*len(Y_predict_KNN) + for i in range(len(Y_predict_KNN)): + if Y_predict_KNN[i]<0.5: + Y_predict_KNN[i]=0 + else: + Y_predict_KNN[i]=1 + + for i in range(len(Y_predict_KNN)): + Y_predict_KNN_Final[i]= int(Y_predict_KNN[i]) + #*****************************************************Result of Model After KNN Based Defense***************************************** + print("************************Result After KNN_Based Defense************************************************************************") + print(" ") + + loss, accuracy = model_main_KNN.evaluate(X_train_KNN, Y_train_KNN, verbose=0) + print('Accuracy for Train set: %f' % (accuracy*100)) + print('Loss for Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main_KNN.evaluate(X_test, Y_test, batch_size=32, verbose=0) + print('Accuracy After KNN-Based Defense: %f' % (accuracy*100)) + print('Loss After KNN-Based Defense: %f' % (loss)) + print(" ") + + TN_KNN, FP_KNN, FN_KNN, TP_KNN = confusion_matrix(Y_test, Y_predict_KNN_Final).ravel() + print("TN_KNN=",TN_KNN) + print("FP_KNN=",FP_KNN) + print("FN_KNN=",FN_KNN) + print("TP_KNN=",TP_KNN) + print(" ") + + if (FP_KNN+TN_KNN)>0: + FPR_KNN=FP_KNN/(FP_KNN+TN_KNN) + print("The FPR_KNN result=", FPR_KNN) + + if (FP_KNN+TN_KNN)>0: + TPR_KNN=TP_KNN/(TP_KNN+FN_KNN) + print("The TPR_KNN result=", TPR_KNN) + + if (TN_KNN+FP_KNN)>0: + TNR_KNN=TN_KNN/(TN_KNN+FP_KNN) + print("The TNR_KNN result=", TNR_KNN) + + if (FN_KNN+TP_KNN)>0: + FNR_KNN=FN_KNN/(FN_KNN+TP_KNN) + print("The FNR_KNN result=", FNR_KNN) + + if ((TN_KNN/(TN_KNN+FP_KNN))+(TP_KNN/(TP_KNN+FP_KNN)))>0: + AUC_KNN=1/(2*((TN_KNN/(TN_KNN+FP_KNN))+(TP_KNN/(TP_KNN+FP_KNN)))) + print("The AUC_KNN result=", AUC_KNN) + + if (TP_KNN+TN_KNN+FP_KNN+FN_KNN)>0: + ACC_KNN=(TP_KNN+TN_KNN)/(TP_KNN+TN_KNN+FP_KNN+FN_KNN) + print("The ACC_KNN result=", ACC_KNN) + + if ((TP_KNN+FP_KNN)*(TP_KNN+FN_KNN)*(TN_KNN+FP_KNN)*(TN_KNN+FN_KNN))>0: + MCC_KNN=(TP_KNN*TN_KNN-FP_KNN*FN_KNN)/math.sqrt((TP_KNN+FP_KNN)*(TP_KNN+FN_KNN)*(TN_KNN+FP_KNN)*(TN_KNN+FN_KNN)) + print("The Matthews correlation coefficient result=", MCC_KNN) + print(" ") + print("************************************************End of KNN Based Semi-Supervised Defense(KSD) part*****************************") + print(" ") + print(" ") + print(" ") + print("*****************************************************Label Based Semi-supervised Defense(LSD)**********************************") + print(" ") + #***********************label Propagation and Label Spreading for Using in Label Based Semi-supervised Defense(LSD) ******************* + X_train_LSD=X_train.copy() + Y_train_LSD=flipped_Y_train[:] + + X_val_LSD=X_val.copy() + Y_val_LSD=Y_val[:] + row_val_LSD,column_val_LSD=X_val_LSD.get_shape() + row_train_LSD,column_train_LSD=X_train_LSD.get_shape() + + t4=time.time() + + labels = np.full(row_train_LSD, -1) + for i in range(row_val_LSD): + labels[i] = Y_val_LSD[i] + + X=X_train_LSD.toarray() + label_spread = label_propagation.LabelSpreading(kernel='knn', alpha=0.8) + label_propa=label_propagation.LabelPropagation(kernel='knn', gamma=20, n_neighbors=7, max_iter=1000, tol=0.001, n_jobs=None) + label_spread.fit(X, labels) + label_propa.fit(X, labels) + output_labels_spread = label_spread.transduction_ + output_labels_propa = label_propa.transduction_ + #*******************Convolutional Neural Network for Using in Label Based Semi-supervised Defense(LSD) ****************************** + CNN_model_for_LSD = Sequential() + CNN_model_for_LSD.add(Embedding(row_train_LSD, 8, input_length=column_train_LSD)) + CNN_model_for_LSD.add(Conv1D(16,2, strides=2, padding='same')) + CNN_model_for_LSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + CNN_model_for_LSD.add(Conv1D(32,2, strides=2, padding='same')) + CNN_model_for_LSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + CNN_model_for_LSD.add(Conv1D(64,2, strides=2, padding='same')) + CNN_model_for_LSD.add(Flatten()) + + CNN_model_for_LSD.add(Dense(1, activation='sigmoid')) + CNN_model_for_LSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + CNN_model_for_LSD.fit(X_train_LSD, Y_train_LSD, epochs=200, verbose=0) + + Y_predict_CNN_for_LSD=CNN_model_for_LSD.predict(X_train_LSD, verbose=0) + + Y_predict_CNN_LSD_Final=[0]*len(Y_predict_CNN_for_LSD) + for i in range(len(Y_predict_CNN_for_LSD)): + if Y_predict_CNN_for_LSD[i]<0.5: + Y_predict_CNN_for_LSD[i]=0 + else: + Y_predict_CNN_for_LSD[i]=1 + + for i in range(len(Y_predict_CNN_for_LSD)): + Y_predict_CNN_LSD_Final[i]= int(Y_predict_CNN_for_LSD[i]) + #*******************************************Voting Between CNN , label Propagation and Label Spreading************************** + Y_predict_LSD_Final=[0]*len(Y_train) + for i in range(len(Y_train)): + c=Y_train_LSD[i]+Y_predict_CNN_LSD_Final[i]+output_labels_propa[i]+output_labels_spread[i] + if 2<=c: + Y_predict_LSD_Final[i]=1 + else: + Y_predict_LSD_Final[i]=0 + t5=time.time() + print("Time for Label Based Semi-supervised Defense =",t5-t4) + print(" ") + #********************************************************************************************************************************* + model_main_LSD = Sequential() + model_main_LSD.add(Embedding(row_train_LSD, 8, input_length=column_train_LSD)) + model_main_LSD.add(Conv1D(16,2, strides=2, padding='same')) + model_main_LSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_LSD.add(Conv1D(32,2, strides=2, padding='same')) + model_main_LSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_LSD.add(Conv1D(64,2, strides=2, padding='same')) + model_main_LSD.add(Flatten()) + model_main_LSD.add(Dense(1, activation='sigmoid')) + model_main_LSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main_LSD.fit(X_train_LSD, Y_predict_LSD_Final, epochs=200, verbose=0) + + Y_predict_LSD_Defense=model_main_LSD.predict(X_test, verbose=0) + Y_predict_LSD_Defense_Final=[0]*len(Y_predict_LSD_Defense) + + for i in range(len(Y_predict_LSD_Defense)): + if Y_predict_LSD_Defense[i]<0.5: + Y_predict_LSD_Defense[i]=0 + else: + Y_predict_LSD_Defense[i]=1 + + for i in range(len(Y_predict_LSD_Defense)): + Y_predict_LSD_Defense_Final[i]= int(Y_predict_LSD_Defense[i]) + #**************************************Result of Model after Label Based Semi-supervised Defense(LSD)********************************** + print("************************Result of Model after Label Based Semi-supervised Defense(LSD)*****************************************") + print(" ") + loss, accuracy = model_main.evaluate(X_train, Y_predict_LSD_Final, verbose=2) + print('Accuracy for Train set: %f' % (accuracy*100)) + print('Loss for Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main.evaluate(X_test, Y_test, verbose=2) + print('Accuracy for Test set: %f' % (accuracy*100)) + print('Loss for Test set:: %f' % (loss)) + print(" ") + + TN_LSD, FP_LSD, FN_LSD, TP_LSD = confusion_matrix(Y_test, Y_predict_LSD_Defense_Final).ravel() + print("TN_LSD=",TN_LSD) + print("FP_LSD=",FP_LSD) + print("FN_LSD=",FN_LSD) + print("TP_LSD=",TP_LSD) + print(" ") + + if (FP_LSD+TN_LSD)>0: + FPR_LSD=FP_LSD/(FP_LSD+TN_LSD) + print("The FPR_LSD result=", FPR_LSD) + + if (FP_LSD+TN_LSD)>0: + TPR_LSD=TP_LSD/(TP_LSD+FN_LSD) + print("The TPR_LSD result=", TPR_LSD) + + if (TN_LSD+FP_LSD)>0: + TNR_LSD=TN_LSD/(TN_LSD+FP_LSD) + print("The TNR_LSD result=", TNR_LSD) + + if (FN_LSD+TP_LSD)>0: + FNR_LSD=FN_LSD/(FN_LSD+TP_LSD) + print("The FNR_LSD result=", FNR_LSD) + + if ((TN_LSD/(TN_LSD+FP_LSD))+(TP_LSD/(TP_LSD+FP_LSD)))>0: + AUC_LSD=1/(2*((TN_LSD/(TN_LSD+FP_LSD))+(TP_LSD/(TP_LSD+FP_LSD)))) + print("The AUC result=", AUC_LSD) + + if (TP_LSD+TN_LSD+FP_LSD+FN_LSD)>0: + ACC_LSD=(TP_LSD+TN_LSD)/(TP_LSD+TN_LSD+FP_LSD+FN_LSD) + print("The ACC result=", ACC_LSD) + + if ((TP_LSD+FP_LSD)*(TP_LSD+FN_LSD)*(TN_LSD+FP_LSD)*(TN_LSD+FN_LSD))>0: + MCC_LSD=(TP_LSD*TN_LSD-FP_LSD*FN_LSD)/math.sqrt((TP_LSD+FP_LSD)*(TP_LSD+FN_LSD)*(TN_LSD+FP_LSD)*(TN_LSD+FN_LSD)) + print("The Matthews correlation coefficient result=", MCC_LSD) + print(" ") + print("*****************************************************End of Label Based Semi-supervised Defense(LSD)***************************") + print(" ") + print(" ") + print(" ") + print("*****************************************************Clustering Based Semi-supervised Defense(CSD)*****************************") + print(" ") + + X_train_CSD=X_train.copy() + Y_train_CSD=flipped_Y_train[:] + + X_val_CSD=X_val.copy() + Y_val_CSD=Y_val[:] + row_train_CSD,column_train_CSD=X_train_CSD.get_shape() + + t6=time.time() + + Y_predict_val_from_CNN_Model=model_main.predict(X_val_CSD, verbose=0) + + Y_predict_val_from_CNN_Model_Final=[0]*len(Y_predict_val_from_CNN_Model) + for i in range(len(Y_predict_val_from_CNN_Model)): + if Y_predict_val_from_CNN_Model[i]<0.5: + Y_predict_val_from_CNN_Model[i]=0 + else: + Y_predict_val_from_CNN_Model[i]=1 + for i in range(len(Y_predict_val_from_CNN_Model)): + Y_predict_val_from_CNN_Model_Final[i]= int(Y_predict_val_from_CNN_Model[i]) + + adjusted_rand_score_val=metrics.adjusted_rand_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) + adjusted_mutual_info_score_val=metrics.adjusted_mutual_info_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) + homogeneity_score_val=metrics.homogeneity_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) + fowlkes_mallows_score_val=metrics.fowlkes_mallows_score(Y_val_CSD, Y_predict_val_from_CNN_Model_Final) + + for i in range(20): #row_train + Y_temp=Y_val_CSD.copy() + + row=X_train_CSD.getrow(i) + X_temp = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_val_CSD, row)))) + Y_temp.append(Y_train_CSD[i]) + + Y_predict_CNN_compute_CSD=model_main.predict(X_temp, verbose=0) + + Y_predict_temp=[0]*len(Y_predict_CNN_compute_CSD) + + for n in range(len(Y_predict_CNN_compute_CSD)): + if Y_predict_CNN_compute_CSD[n]<0.5: + Y_predict_CNN_compute_CSD[n]=0 + else: + Y_predict_CNN_compute_CSD[n]=1 + + for m in range(len(Y_predict_CNN_compute_CSD)): + Y_predict_temp[m]= int(Y_predict_CNN_compute_CSD[m]) + + adjusted_rand_score_temp=metrics.adjusted_rand_score(Y_temp, Y_predict_temp) + adjusted_mutual_info_score_temp=metrics.adjusted_mutual_info_score(Y_temp, Y_predict_temp) + homogeneity_score_temp=metrics.homogeneity_score(Y_temp, Y_predict_temp) + fowlkes_mallows_score_temp=metrics.fowlkes_mallows_score(Y_temp, Y_predict_temp) + + landa1=abs(adjusted_rand_score_temp-adjusted_rand_score_val) + landa2=abs(adjusted_mutual_info_score_temp-adjusted_mutual_info_score_val) + landa3=abs(homogeneity_score_temp-homogeneity_score_val) + landa4=abs(fowlkes_mallows_score_temp-fowlkes_mallows_score_val) + + sum_of_diffrences=landa1+landa2+landa3+landa4 + + if sum_of_diffrences<0.1: + X_val_CSD = sp.lil.lil_matrix(sparse.csr_matrix(sparse.vstack((X_val_CSD, row)))) + Y_val_CSD.append(Y_train_CSD[i]) + Y_predict_CNN_inside_CSD=model_main.predict(X_val_CSD, verbose=0) + + Y_predict_CNN_inside_CSD_Final=[0]*len(Y_predict_CNN_inside_CSD) #Y_predict_CNN_inside + for j in range(len(Y_predict_CNN_inside_CSD)): #Y_predict_CNN_inside + if Y_predict_CNN_inside_CSD[j]<0.5: + Y_predict_CNN_inside_CSD[j]=0 + else: + Y_predict_CNN_inside_CSD[j]=1 + + for k in range(len(Y_predict_CNN_inside_CSD)): #Y_predict_CNN_inside + Y_predict_CNN_inside_CSD_Final[k]= int(Y_predict_CNN_inside_CSD[k]) + + adjusted_rand_score_val=metrics.adjusted_rand_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) + adjusted_mutual_info_score_val=metrics.adjusted_mutual_info_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) + homogeneity_score_val=metrics.homogeneity_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) + fowlkes_mallows_score_val=metrics.fowlkes_mallows_score(Y_val_CSD, Y_predict_CNN_inside_CSD_Final) + t7=time.time() + print("Time for Clustering Based Semi-supervised Defense =",t7-t6) + print(" ") + #**************************************************************************************** + X_train_Final_CSD= X_val_CSD.copy() + Y_train_Final_CSD=Y_val_CSD.copy() + row_train_CSD_Final,col_train_CSD_Final=X_train_Final_CSD.get_shape() + + model_main_CSD = Sequential() + model_main_CSD.add(Embedding(row_train_CSD_Final, 8, input_length=col_train_CSD_Final)) + model_main_CSD.add(Conv1D(16,2, strides=2, padding='same')) + model_main_CSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_CSD.add(Conv1D(32,2, strides=2, padding='same')) + model_main_CSD.add(MaxPooling1D(pool_size = (4), strides=(2))) + model_main_CSD.add(Conv1D(64,2, strides=2, padding='same')) + model_main_CSD.add(Flatten()) + model_main_CSD.add(Dense(1, activation='sigmoid')) + model_main_CSD.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) + model_main_CSD.fit(X_train_Final_CSD, Y_train_Final_CSD, epochs=200, verbose=0) + + Y_test_predict_CSD=model_main_CSD.predict(X_test, verbose=0) + + Y_test_predict_CSD_Final=[0]*len(Y_test_predict_CSD) + for i in range(len(Y_test_predict_CSD)): + if Y_test_predict_CSD[i]<0.5: + Y_test_predict_CSD[i]=0 + else: + Y_test_predict_CSD[i]=1 + + for i in range(len(Y_test_predict_CSD)): + Y_test_predict_CSD_Final[i]= int(Y_test_predict_CSD[i]) + + #*****************************************************Result of Model after Clustering Based Semi-supervised Defense(CSD)************** + print("***********************Result of Model after Clustering Based Semi-supervised Defense(CSD)*************************************") + print(" ") + + loss, accuracy = model_main_CSD.evaluate(X_train_Final_CSD, Y_train_Final_CSD, verbose=2) + print('Accuracy for New Train set: %f' % (accuracy*100)) + print('Loss for New Train set: %f' % (loss)) + print(" ") + + loss, accuracy = model_main_CSD.evaluate(X_test, Y_test, verbose=2) + print('Accuracy for Test set: %f' % (accuracy*100)) + print('Loss for Test set:: %f' % (loss)) + print(" ") + + TN_CSD, FP_CSD, FN_CSD, TP_CSD = confusion_matrix(Y_test, Y_test_predict_CSD_Final).ravel() + print("TN_CSD=",TN_CSD) + print("FP_CSD=",FP_CSD) + print("FN_CSD=",FN_CSD) + print("TP_CSD=",TP_CSD) + print(" ") + + if (FP_CSD+TN_CSD)>0: + FPR_CSD=FP_CSD/(FP_CSD+TN_CSD) + print("The FPR_CSD result=", FPR_CSD) + + if (FP_CSD+TN_CSD)>0: + TPR_CSD=TP_CSD/(TP_CSD+FN_CSD) + print("The TPR_CSD result=", TPR_CSD) + + if (TN_CSD+FP_CSD)>0: + TNR_CSD=TN_CSD/(TN_CSD+FP_CSD) + print("The TNR_CSD result=", TNR_CSD) + + if (FN_CSD+TP_CSD)>0: + FNR_CSD=FN_CSD/(FN_CSD+TP_CSD) + print("The FNR_CSD result=", FNR_CSD) + + if ((TN_CSD/(TN_CSD+FP_CSD))+(TP_CSD/(TP_CSD+FP_CSD)))>0: + AUC_CSD=1/(2*((TN_CSD/(TN_CSD+FP_CSD))+(TP_CSD/(TP_CSD+FP_CSD)))) + print("The AUC_CSD result=", AUC_CSD) + + if (TP_CSD+TN_CSD+FP_CSD+FN_CSD)>0: + ACC_CSD=(TP_CSD+TN_CSD)/(TP_CSD+TN_CSD+FP_CSD+FN_CSD) + print("The ACC_CSD result=", ACC_CSD) + + if ((TP_CSD+FP_CSD)*(TP_CSD+FN_CSD)*(TN_CSD+FP_CSD)*(TN_CSD+FN_CSD))>0: + MCC_CSD=(TP_CSD*TN_CSD-FP_CSD*FN_CSD)/math.sqrt((TP_CSD+FP_CSD)*(TP_CSD+FN_CSD)*(TN_CSD+FP_CSD)*(TN_CSD+FN_CSD)) + print("The Matthews correlation coefficient result=", MCC_CSD) + print(" ") + print("************************************************End of Clustering Based Semi-supervised Defense(LSD)***************************") + print(" ") + print(" ") + print(" ") +#****************************************************************************************************************************************** +if __name__ == "__main__": + main() +#****************************************************************************** \ No newline at end of file diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/README.txt b/Taheri2020NCAA-labelflipping_Sourcecode/README.txt index 855f77f..d557dd5 100644 --- a/Taheri2020NCAA-labelflipping_Sourcecode/README.txt +++ b/Taheri2020NCAA-labelflipping_Sourcecode/README.txt @@ -28,5 +28,5 @@ Note: you need to preprocess and clean the dataset before implementation. I will be glad to cite our paper with the following details in your research papers: R. Taheri, R. Javidan, M. Shojafar, Z. Pooranian, A. Miri, M. Conti, "On Defending Against Label Flipping Attacks on Malware Detection Systems", Springer, Neural Computing and Applications (NCAA), Vol. 32, pp. 14781–14800, July 2020. - -DOI: https://doi.org/10.1007/s00521-020-04831-9 + +DOI: https://doi.org/10.1007/s00521-020-04831-9 \ No newline at end of file diff --git a/Taheri2020NCAA-labelflipping_Sourcecode/copyright notice.docx b/Taheri2020NCAA-labelflipping_Sourcecode/copyright notice.docx new file mode 100644 index 0000000000000000000000000000000000000000..e238f77a424bf7fa46148840f9c8485e73d491ab GIT binary patch literal 16152 zcmeIZgL`Gmx;-4*wryLTj&0kvZQEvdI=0<$I!PxT+qRA0+WQ{t-RFGYKXC6{&w8KC zsv5OwRz|(EYSdc_(x6~yKoCGsKtMpmKvJC`j<&!+KvWPwK&U`aAljep?OaUlT=Z2u z9Za2d89Z!lh>E~Ks0x5U0OkMh`d{1wjVWVR{Y*%r525eDznT}N+Nr({j26Y4;#xcd zN3qF?m`X|{FKoEu@s^~JG01i?FS0(Yvl?goGSSX5fhV>ycjg#}paZ7OopwuXh+mTS znoNr5qcO}0%{e2gAANkbVq5WyfG)EgUqbmM=s>WCB8AL)@LlPvLm`?(jp6sMgnT35 zS2w2CooP+5XlgYA8$Pcf;Yv7)N$qEyWH6>g@_1L{)Ku23u3t(-b5G0Vgw&SF35iY{ zsOihYxK;+Q7Q(9B_)~}L7VMvBB}zy(E2zq%?R6A%xZ7wXGjm(G`;MZ7r7iAKvk~Ql zV;XO!O>DoSEfYM-JBh4fM|{XDzPJs8l0ZKLS>*5{b1?Z|#BOJMQc7&$X@&c~#?H5+ zf>a(yZf(9s^D7i(;2s(y#=261O_K*bb9ZJK+dDHP@qGLKB#}=4YIxdzR6a`$e@a{+ z&~wC*Sjm`&SYA;ogg?6f!YlX=7-k<Ipg;<L8>Iw^I4u_d>d66eVF9C5-^tX*nUUcS z`+uYJfANs~%cEB&eFuzAIH8N+x8Pr0ip$@ziscxMr&h3*AfdD+Wl&cZtrp(j_!bvI zbdL<h$EIhJr@WkV#9TL1bT4tzRN<q#py!|T`n6v=J%Ca}JHIk}%sXtuW$)jcz5O7S zr2Y{ct%)8ri35@N7?Cm2i@HZ6a=-Tt){=yJa@Lrvp*YV`b>ShykF_9^d2ZfHLfad% zoL{s%48Q#+wjYd{U|l*ZeL{TSdcE#|%5iclORN?39UYDh6C-&}qERIb&SmGXW3S?# zZ-bGcbkK#+k&G`sW;!TZ?*~!)*n%7v#F~xzTlTm)x}R<U@2dYCA6>gEE5d-BD=;7+ zJRm4wcY7xj#{U^4CicdzwtyGfA8)dM4j5p->kLr(fA>|HGA8$@JC}I<yz6Vk>l>8i z0xF?f8+Z+D$LCoZiN6He=pRolm7RuCcj;uuW*%2vNtf19f*{blC{+=18IcI2Qeyfw zDx|Y2zmFT54u)K`3Z2XZF^z6!C3_Hd7Gy1nzG5CCuE<7MYT!OkDLZuv3ik*j<t52_ z2Fj(F+Jd&eiYj0zMz-vTRl;isPK|}wjC}3wt~>Oj)-_KZoA2<n3pi9=PQj&PGoTMG z0Gc~SkT@PMX6giiG#??q))QPry_P@owvRY5Q$A6LRwr6^jdm9cUUEQy{IrIq-9#eL zTea60-Z!wH&&BWf#^*?)q<Z$=<TAD?j@H#RzyZT?$ZVE{Bhquk2&4YWXQ02s?m$!e z+fD0dCaKpW{;MN+DVo@~?RJyW_PPzs%B#LDELW7<Q}I;4|H?rSL0Q2jXds~T&p<%P z0BZfo!N0QaRA<epm<ywOquduVwWj?W-pxUe2W4ho0BQ-|3T!Lu5-<2}hNVzSucLSM zJ^nrXgXBF_ZLoh5#ZMb`^W`=sqC$C|1D^wl`na1LM*Th@ew4IFw)Rp<__{EOU8Tu* z0tPE(bBFn=LbAcca8R7Y=;bJ~CVhsF_m=#0{`mIj1QBYG2&yA`<A)*+`I%Nqs7PK) zVmX|`3zn=Y&1DBms;Q-M_sH5W<ESQzC=DGMis@v~x8Jr7ktv%%e!eNePZ)bZ$8sAe zI~-(u-5o;>yR}M4b6{|J2;3zN5E9m04p-8HpTl@8y${F|2mW4qpP)5NjE_|9i<i_H zKg*3Exg>r}5Z);M%^`o2jRINaCl#zCO#JC5DCU>Wh)E!>p)&sqNWXRvnYym9?|!#W z5y+n^;Dt*)YQzqqi0>o{Jgm{Y6t!zh)f)v@8V^2S?43oAwS!lX_^*pF27RVz>y-d% zFHmH8*oh4fA1?Z|=L3KJn1mE>SnFqxEsegm*LL~kpxg(flI2zS!!RNK)l@{X(f}Eu z&Qj47YQ?u&90de6pGQE7oqSIU9draoXdTp|<BL(RzOf4|+;`t=g}5DcmsoZ=JM_36 zL29*ftYJ4uWe|yx6C$k>N3x1qGR4<Rj%7XO_K5tc*OtOe`A-(3>7X^eG>7_bqr0}j zbq8Ewi)iiqH^fD78<0#SM64hJMdKi^sN+8vm*}5@Jcvql`>yx$ylDMm3rm)wnfoc2 z#HTxcv>E0s)GI-~Q#0DK?{ybO)N*Ov8<g#8VzMjR{D`u-<rGDAD^Q?h?Sz_s%YtT{ zCYKZ-G%1dxjM4XVFMLOa#BB10`6Rb#N>>Bpa9dkV_G@W&ze!k;-Uey$`<2R|=6(I1 z4r62;?2j01Nhoi!Ch6EA+I_)s<=gLX5sxpH>sA%=>&0}@QRUqw!_t~5g9k((I?Tu3 zpZzDe><{t6iSHBmzxC{3n`s-YLcD}n9p#oR+B+VnK+RFV`+*7TmVXLhIVUS+3Rx&E zXfqp5a4lYW4rDh+o|ISG-(Y+^hW^e0M^9A#ge-L!)J8_z_CPwW7BTrGiBJFL5=S*w z?ULBNTSlI_v8yr}7F@g+mRgQjD5%Xkk8B1u7!vB2g@X9$)&G{iRq}u<6cHo|5_pf6 zdUtx{rXiHV{#MSE(y}JVy-_>&llHN*AeE=Ma^g!84S!uXnKirkp;bMDdpD~%SLI8- zUQW1FP-j{CN*f`P1Mklomvc%X7DjhIgQ6A(Pfy{`ea5(@O>t^2fqkM$B&Uu^X-`kn z9@$D2OR9RJ=nL|!n4>yd+J_{l;lgy`3H=XE5kK$wkWxUUye!!cqAE{nw24hVNA$kY zZgww$R>;UnwQR7ZU_6x9$jZ?-k+wVr)$>*6_9e;NEtm)_*|$t~Je4<HQrTfwpPM31 zD)k|I2KOFVJGHfW)?)Zb#$ZxUtlfjFn}Ond2ZIlYJn3inWGaBb^;0J{>AE4ilCXaN z;ptnNiF!JO&-v&}LPO7M({8+MSu4XY5BC*QXM5dU2(X4qK?F@G;CNQSfaOp3>eS0< z&#$nN&z0U<vAR9MNTTKtJ3BJ!%$+#{WKgGQ4UM($hy|6>kdx3iv}HpPorL1dlV^!? zd_3dD`plJljxHohsw8V{9NK$4&+4GERHj_l%yh?d5bal)`CY|<Ae+@3adMr!1H2me z2@!;&xf$xQo|HKFMTyS=Z&XWbR5=p2c#&a`d0><@_(ju1mphMW3FXd`vdX7^a!CBL zj*!r<!NGg)>TZ+*xfHGHRmn=**Ng*lY~>Q9-l<>6WURaoj3|X2T^E^yCF5-JZ*m`R z>GI{+cqEFEHJR{oBvhEGEZG1Z_u-YDUtj8ohbA5f{meA2a%YV1Tl5K(+6jNq5ma(Q zPW}0!=Yy2JqsXIZ^ApT_JCqtFh#DEuq`0Mxr8JiZ6d8{0*Oh6vq|kTya3ONg&NHM# zMk4{cq&wFQwllMq8ten!;hiD|?1Q(GW>cKD4U|duc=$U%X=~S>Cd)YPd{QpSxq)F! z_F+WHATg6jI-Kp&Ao>by%a2}{3T;7t*4)o&OVPxkN*`b-e&LFr3A4M_+RPCgQv(YH zN#}Pw%f@e)GhO_Wl$rs4zm4UG%c|jeLsr9#s?fZWP7N;&m-82`o>7$nWr5^8B9GZg zloawh;z%QLiQYt$BRHdr%*9ih>7^$-sP)nti?464hxbnB(&E$bd0Y=MJT!&^tOwL$ z4eUTqJpu=1pmk-ZaYmGq#b_EW`ja5m)XKYP8a+IFt|@6W_idQ3rB6wMg=aCNA>X9Z zu1Rk?>BO$b>67*tw!EJu5;{ih<1CIlzT_QvfddUC=lqBa{bi?yd3y+f#ntEG-f8zX z)5^ySs|AH5AVhm977^Ion+#@)g58iZQK@NpsH)>QrKUnEegBg69e$BCvIBy#zHhzZ zd#?42*uBT*YI$1Vrk_!)U}k$^@jJd#HT;)YO7}ER*Rz6?D+NwE+%i&pDe*Yd`)@z7 zV64jF>8%+DXD&FkQ>2UI&J(Uy23TxxW9d!SWx^;dfK-giNQc=@d051k9m=zdD|kT4 zep#Fr6nYx-TxPLWJIi*-Up<(L?f|zl)1BcE%yo0K<#vqkg3e$&g2M}arpYQzGq##3 zv??5(&5Aj5rx7&ovRhtOV7G_s0(OH~A=yc0c1rNALI~%TW9r7o#-JQcgS_1+yZx+u zh8Vm0WfQdB5<9DZ^xfs#Cng{gxRa4QtgKP{m<>3CSgW<UPskU_4z+o<ug)+@fg%#> z_OrAG@8LBb+yi2^R@j_5cst);QdaZsZHHY-;sUFOjQL?bnhkwf*!$lhapF9o@CwRo zXFt)${g55sgSZH{YQ2!RO&$(($2k{^Gt``kG>#J|G!*Lqmxt6>`=loTe`vw?gAX{z z&t}b0)&X-g$N+y$4_5)<rvGp)X}wP6e4ZnOA=cTO6juX-BvaW5n2n`6Y(JHG8cvUQ zzi1<?`Rd9Srbeq|;wy_Y=tjB08nyclU2pK)&C0Y@X`U?T2p*B@hlx|rE~7u>b<g_& zR63LBiWzfmFy6gN2gA0WaRp<c1~uh?ETwjRS0mUy{WfH_{c;*6yCly+&wRKn!D>r~ z6sP{m5F%#=X$f1{bxBH1&Qo3>r()JCvRWTCe10A%3+>!b==;aKCQ>tf)p4qc=GwuX zeiC}OMoo7H_{XriDSCfO+OVoY^c_LViTgHGL07{RDPC)`%Dta<H-xP)@*kx;*ZzPR z+<&e!m?^Dk@&Rjb4p<-{lz-0g%<S!5?Cf1ko&Q*3%%-h4Wiug#UGUDh8??HEJqAn+ z{3Mf|9@V9=BospKGYdi>g*NW-RmWS~G-GNjX9U8(pE<9Y(^}w!Oyl-0ccFP)zN_I| zD9nlA+FNOQd0E&oIpSG1AlQ*{Xm^WO>EO)1*r<-CLhf@gEOiU`?y4eD@8&)<0!1wA zmSX5Ts8ke!5!pr(M4U)QU08}lBNlkzy_c>Mq)=t}oiO8~&dnN+8?JZ|2GhVMkX6J@ z1KaBm>wPeRTdUBN5G@AiQ%K;3xGbX>eZpNiWL7LlpgRAw58jcCL$u?U0(qF>#1h|W zC%m$Bxyt-aVI_p%NCh}ti*kA+jjjVH5Q9kB=QOi2Zjc#mHmscM2V{akRcV8nM<d~g zj0j7PSllR$j<#(@TS3dSsuM+1{T>`xcCz;$yLt&ZigrIn5{s}ch+Egc*7I-K(7{8v z!<&3)B6+8=-@qIp@-ckS$Umf};5$UTG`-VR@O|VQdV{(RkZ45{GrV@0?LY_9onjD> zPeiu9_kv6V1rZp0I_wSFU`k^LO*{|LlH<9wV9T*mr=vUzxr3aqi~pGe#MTAJZ2vkX z?uL@M>PK(uPm?$nm*y9HKZ{=2w+_i*Fn8qdMRvE4<GE%C-4vO4XgL<zXOHaxHhL-~ zz_W+2NNH2yo0@;SxhTA8CwI;$hn!gqCM`>@wg|N+oIX($sV|pez!QDCi0a~1ak<6d zlhJ0%_{xj5f;%s04U=B>2u|VkuKS4fbEVolYm<-lEzx`h9c=yfzFF&1b16M7Lnws_ znleoI?5^@tn$}vd=8DIt?IIUvpJ=xAY?gWqMTwP!D(Vk&)DKPih)s$;Prw>k@OV2n zk-QT=_{qu?{1MTF8+Y9tC711XAMwA^8HvR$Fc&~)CIFrPwPZ21Gx-afN9|VrU^DnL z0>}jdxo0dA@5nS-8@S{P5S-|W$N)Im-rgDStWjH{^~jGh0SUCxkDYM`uKkHKIg{^U z*kh?V#fLN^z{x6Kb*lFdA8NjRhXbES&P<Y#<Pi5Z`*pVc(vof4XF`)|%mEbAII1?o zt;PEdHHKc7_0wtU<}6i3+IbXB;GEMab-%W$5?;8XMq{I5iYDv{KT;bphALP4iI_C} zQqUr(ct&-kqs;<SQ8cl!PprAE!YU{j!H^;@dnXqA_w{lNi&<DOwI+!jzb~jiWuFtb zIV_js*=&-{-XyAAP|m>Avm~)}Xh<W1<s15fGU9a*fhr?+F8Frr5XT*qjG@vA`RPFB z0*S(o21BSZXiS%ws;I5voJf<U1i22ojwH)!^qTryu6k1VXtH1_bho%U=z=w<EEjYL z<ysG=gTK4NFXcEJk=A>ytBK>skM;bRF~<G@u{iij^G-rbFDM_=hrqjy3@M6<`mWc| zEFy%#0s8<mq3BQ?2mS?OfW9}RJ`TLcY@7z7WCHpQZ~e+*HpfYwFPt+lmS?_hatIHY zt^<y_`mEO5aW_%V7r<ql)RVBG9x-pbsHu5<0GCyd{C&vI7GgZLQQ?~;r*=KbQ;9FI zHNZykgajZ45gq|t_D7d(hV>y9ir#-lu|aKFAZnCB{hY%B)$W4=Uu2XAu}5`fJUPXk zGl=M(>=6^C_vq}<?XLCKiB_$2bb=RZ=pyKw^&DxsK&qX@=X{okxq&{sb$MQ`b2<2! zJ{J;{%oIt7A3Tlg^=Zg=ap%|84*P>%@+x%*Cg&p&BX*PgUIH~G+T@Qn<-`Ih9)_DD zqaNQlDGw5=UmmZd?SAbaf<aExU97-X%WhYY|IIaP1z&?80X8w@ae#o(|LK}tEKF@p z8UM7*e|YCpP1$H%PONUsXTpJ1eE*3@<4Dd$V_wUK?2uCma&b?wJViBDmZZ)O3NR@r zIy9-7>Cd2DcM8i8D2L>4>?<XW)Vqu-(~EhD^bKDe4XLmCv<N3(j(@#K#k1v&smq~) z#3Aowl;D5!D%%E1fnXc3oOL|MLy)G2bO4?~LdhzMWBt6^R{ydEnZ6Ylp%@g<DF1|T z%2kJyoa>YcS&w}!?GmK4#92ZYD6FQbf`HAJ=!dFQtx6N?1mbucfJQNWHgSao357?K z{`0)=DPY9EY_pu0=v1gpEG%>=OkDMD(0r8=4)61{<1QC5Kex!YK435>VqP9OQU9Fl z^S$fohzR&*@Q%Xw@w0u1Ek)UtI*Ud&7u`u)m-Kh`fC$&s0&WX99BF?XpkSe&Y&%K= zUR`cA7ZY{h3Tlmy=bSHh<$;WkH8ooudC#nuI4S6B?5_@o2h4N<^+o29k`4PE5PHG& zM#k_Pl`b!KGriUdG7esiVbvb7?L8T1C@dwrS?KHctA-66;RmHohOVafWs8mTC?5Is zg<yxf!hy;}W~gjofdfwoo5=kJh~jk${k`~Luy2~my$e~h5dn~J@*=}e9w2+&BgFQ4 z?~nU?&JOp7gG<HwKM4c{yl)!@k8xy*i{?%)dLCbnzHQbVY;^fur{YgWKW_LxpSI}t zG=hpEKOUc$`*(WWA7@W~ylzE;ubVokgb;3nIXZym2CT3pqBg(<h@GC%As9zQW2~`l zGll*b3L=PD6xzu61VZ0Rz(VSgE=gPqTqAdZ8-i%WfxZaRNk9qVkgqgdJkIm&#H!K= zt;EuVFh@4Jbp?rFW$0f0nm0pkH)@C@-7f|%t0h8)J4PW2Ou_dSia2_SKPI;Id?!Fr zZWv^>zW}n1oh;nLq_Sl75J%2L&x|98Wkby7{vwEgoa4dQ#fD_BI$&TQ1(`HPeM_Zx z;0G4Pmx$G1lIBFdjJjSZE=-mgWD+zu;eob^eTr;sml|o{x^KSu<0z=3U;_2T4@6%@ zP4Wqzm}Mx`esaY|FUCJ6>J%ZZ7WGL5W$}v4nGzVZGq1TbvrPNGuXmF+&2$T?Or|U~ zgB9)j<s>qTEPHy2)}iL)(QYh3t)SXbr`B})TB*;&QZMO-Re77KmU3>X?Ja$y8^F}h zeR0TI7k2urQE)N%)W!92G3Yttx86;5KvT)M9evX5mTc4M9nVY*X(Tp9u<$&wciUHJ zGmCE9*m*2(q?v<gzDWY%ip@kL@SQ{;VoL-;d5%=X=bUHTK)B7PcWOdyqdZt9*%!yG zYVLUZNC`(Mu|>Vpw1dxHWPaIVKVDJ#p&jFBcnuub^o4^ACODJq7z1TObcdVlcD%W- z$ky<e(m7}Sl>SZS$QxwAvtgeztC&;Qry$|fZ{vQ6YgOi6%1jfM3v@HqwACx4jh~8Y zS7lH_O|klssyOVq7tZ*;Lhz$Ux#CRB`zKnHlh7^;DK$XI?dDZ@Ps;*P;+xZ5_|<2~ zd=ad6K~17M;|k1xeBPiM3vv7v&iB)xoi^J)2;3-OQ>6ON2uC8zxne%cDh@M6r^D%r z<Xcaaq}UEK%s^OLC(R2B<@~xkSOrY3)HI}{2Z)%-54+FUR5hBaz;Mi%V-{f<=osz< zgXnEp_M$uLH9pb8ERkEUBGgfy5DR<cxqW<WGjp1mcTXl_iBi+N`Rxa|WpP0kc3eWU zWtI!^ALj{-wLe@p3{oW@%IBLfwOWo+JRU#`h3E;#3reuGOZTcMn&|bQVEXCP_e)rf zR=#~dE?0#d*w!nvwywcEJnYta^7YisMfuLI>Ozx`{5pu(pNVofAKIP+!(`gJ!&mB| zyHMllS2leOLEdt%CPiPP&FIQsqlfP?C)K0wO%+VC{pHxy7;*0gE^}fd@9@5flWKH3 z%a^)9GV|EmK@<0clWJ~xpOXq`Y=qBZHtfu+V}YvaWX*8&>_O{m^TuK7Gknhy;k|Fo z9D#sKZyiL(TKfeDM3qwU@e6s2qT;RgnbJ(vT)SPv3Qp@<_>IO`CSy0|)?BCC$A49D z^H3-coWOyA_L2TF9db5xaj~>B|8sKRsG)7Y!hz;TSo>k{=o#nw)uhmlSaUk7NHdzd zKcMRuimh{^NZ^;9_QCg;I4Wi*yk-flTXKp?cRN1JYpR-cpYn=P{;i;TGw!CE%A*iA za9Ya-ZMl#33G*!%5;8j}ow_i(Z<Hh@_Ie-BV-?0^5h=YC6a_`din?^)ys1|#1TY8l zW;Jx`(%69Puxsi<=r<;;$1f1hDh*tt$=9Z$e$1@a(Xrvr*6!W*W?LvQS6F_}G|*m( z6tU^}3Xbj92<9KRrblKvSV$48f^m?evI%NEkk>41UD2bC8{l2F`>q|{T0=YG-dJUh z_b}q|M0ADsz!R9(5CjDbT~2#OWGmLcjw*`ST+@5+Ao3Mvz*f5IC;x+fNv~%S5c=-S zm`g}-9JfT?N#WkuZ2=^|3tVMv_hcq3cu=QIXw4haH!r@#(!Asy0xx+^yqoUeFt7zt zmo96p;cS@IKoNN&b<{oG2h6zDw^H&&ANbhU%sMZp7_C0oQnOZYVYXa%pjo7qYVtH9 z?8r2g)OL)hC<7*!7&p9ysi~|Y6RG;aaH;j}39WjJjC1B3%Gy4H_55XiOO=&3P|T>I z+^MrGqj+uxvaJ&gOat3NoKelMPg3`g$+WF}T(fSBb*3^LGD-2TK9C@~s^h1_GN4Px zr8m-=(<$;Cm|g_RO<CeWZ*v^g^Kb0XB=FV5r190=xN1oZNsw~+`aHP-QDJBBpIC8> z9IBkPl2)ow=C4%}@q*e)@+DVOE)ay3rb=0^KnZ-tfJaz0-hrCDy{#nWYWakg{gRkf zu{}jsaDQ;`EXYDPy*9+)gFWG96o#&8Q!R9aNRA%27~6gntUY;Re_zpo>3T$^QESjj zONUfraghs;i`Xk5!u#nE*g7H|2GJvwVDAl!YSjORZ!R`~=(E5{;%LzJQyr=CN2~<O zZlGfJj)ol`F7jk7W;tp;QKUWnI7?ZNgA(d?AYvYKQQn#Nb6UF)-ohvf>$Z00Qrrba z+AmIf(<&oljP5*JK?J-Fs$f3GCQ#BK8%%8DSSR;A>;`-=w<Mej8n{z7Rx4OgNYA0y zkiZMxp_>q9DNHllNZbQ6L^Zi%EcteaTsHP09t4mSoEy||zfGJDGCgiTqZrp5?YuTj zlwCTOVws>R61?gwX|meaRAgvSv<y=Ws^%D$GFx6)<H@gquoDI%!W&{khhHLluER0k zKr}V6S@tO>_-L8Ea+e6unvo@gQf<eD(}Z1Onv!`Du4CPt7V24zU7R+?P(sJOq0U!$ zjwzncWiQm%cDT#DBhjllbGAoiId3L;<KTOwEsC$;0~KR)NKg93f*UjuBXHDJ5hlYS zRFY>EB}KE7s<hNOGcLrsKp{Dm46nT&3M#U9p+0gOKCcvu$F008&eAn6w3y=m66^lf zR1)29R`nGJi(#_hv$9|70garoM_gjauq-_0MV%KJI%8_X$;!DVsm6$gLTAo<wE2aA z+ZTBHE!zQ~W`gDV(#Ljhl2LfOmG`e}r>A?3MIBv@yvWcUC;|j?W)02|<BAzG##X`6 z=cm2=bYfY>mgYXyC2gb=Gi>%>p0f7i*}hno7JoO9=7Jk>zdSrK72l{YHxgIs#=oNc zA^>F-5{Z~}1pGisKRfb8p>CL7^5qEUNAg{W;VMS*ON39*Qf`V$AEvkT7*78}D!jAM zH=oVw%SilP(shKBjFn*=DY`lWF#_lcjJC_K+0U>rdz}Jv*^U}L$MLF)z@RFy&Rr%t z#y!;I_nT9L>@FmgX!wI}85)NEgiG(Brw^CuVC0%v5KH5FWar~Ioi=H72Dl}jd5ra| zv8)F@g^729RC*tP1?zvxFP^JO!WjV94Ga2*gN6bI?5-&|**iEh8rwUW{v%ZB|MqqP ziyxmPS$kAwz`9-H6>^L+VR2DTLPagR3hXN-2{Aj=z{Fzmz}fTX`^J@5@6L|VFi};; zd7sW355Aq_=OvC^Xd{;Os<I46sb1=u*p-^2_%mBaQAy#WMw`xHc7EZag}W8bRG6>V z$lc<EL`8ujETdnQLfQF=!jkH~AQ+ftnTC#y=EkcTx$WeEml5m|Eau#xHv}k2r&NMs z7?Mq+Dpeq36By5cHX2O@fQLs$W*T793pXAO1_|86Pvh`Gcqn?w#T_4*h#9UlH@N+h zcK!%@TlVQV$=AYKn2-XlqOFo*TNKi*i_WaNeLNsQL)iK?+8jmnYnTWBwQF^B5%%YI zyH!L78{b*#bh%EtnFJL!;2RXkOR|KySee#Q?KSMF{P?@Iqc}v99mHv^vQb&Gi*Akr z33r3rpC&VB8xQGk>g&j)e3|Dnvty!+*l)*;L51#9WbxH_5WYWOb$r^E)~CIG4%TlL zZ4$O@E4QJMAM|wGQ{G_~@Vj2pI5O}QY1j@H?N(4<!C_Hwy>;K||5y1XG0rH>3qa>d zfCPi|S2`OyIQ$=*0bTZwD@W<iNQF+)D@4WfS<5n*^5$v<<n3N*tAGYmdlcB=W}D!{ zjRp#&pw`5h^GbI<R%V+GZ{<$Ih~*_&?tE3#KsBDPwjI=jCl$#z^%0U9<S`Ac)QDJP z^nwCG;hTJ6UzjmO40lRSm9DLaicV`W9mR~LlZQg}$gDn(>C9L}rptBTTcB#I%tdW^ z3s)%QJ|@=UVJ+An^?dm;TFEFd2PyA^XLZ_z4E?0L&c?c@glHpD=q8z=^)Pba+&7Fs zAQGL3%7J!y8WiPIrvr>DwdhI$l}RNmpsyxvfC|LW$*C13a&)at9~@mRIr#Q%mjo3k zn*g}JPW%bSh367xu`v|awgGLLMdb^91P9cHs9zR%9RBH7rq*IG4^kNs13{ouor_R% z0S>i+2NXizGX`fYecGPykrU>*?Om8G(VFy=C1I4Li>OgicriF)K-EVrV9AUHx+_J` z0ypjnq&_(7l)9W1hcDx&xTHHf{ab{9p8rqxTW`n?xd!0t89;zR1C;nP!ol6tNck^Q z<kQp@fMN(2b^&>c54XlCu&c0Op+rQ>1BVr6ctRkQwx^LnhLeQC>HDlG@a4Qd+N0NP z4mY}JWKvKPw!6z)iG|C-82peu(i|DO)l31ZOFNk`9#Nedy0W;v0EJCInY3ls?&f{X zHkqC8<7aNa1TK;2icB4QSx!X*Q)A_J)H3$0&W<6pm}0ng11vhb;KLm{sH!rh6LBbS zm?T=2L_Bl4LIEB4&O`5gl=Z@Xc$hR<P;+YOnUFO@A-h4;GSJSv5x<lhpJ=Fn;r1Z5 z1hiSN@R6B@Oiih%p%-3LS`uTx9-EL8ga-3FTWsQe3=-=wo}$c7r5v7O43|Hp8Cb0k zjuH1^t@s|T65UJVjDZD1VhAft?ZhY8dwG?U^!b5Hc)zAlu0=oitnmk8x#T$#$(fT< z_>n@c^_Fl*nyZwM^=QH~bG`QOxVfDh1J%#LFUZHv2N*e^ciDuYHn2PN4@t#W^~kG> z=7a~aSuE=-aAceKh2D_(!f`3GW8?bNS!vcosq&KW!=f*qKBXaadWhta`!}MUosu1e zolPGf&Hl*xl#V!S;~VWC7wsFF{X8*SBsJ+kO{h|QH;_!)ps{-;s$yS5_D|B|*E+-m zk0u!ZD;*FiiPn<gfPhRrfPntcm;a=Lvx}$A|LlaE=1My6v?C8+l>KlwI7-3eYc|oo zm*JnJjZa^^8)Ee%mEVU;ObE>&Lu1gpJ^~F$0V{)!f_L-zF_<QDk0|{>DCGQh_OL*P zkms2Oo0$h6Mu-3bEdLoh^7hK+@@C8H!Ubs;T$&VPEF~X)-9zB<?MDCOdS&cer$@9W z0hLgezf@nt_f9H|{QHL+ho7iv{t2*o<*}@p<dq8g4a09Crt*76j47i*=teX~ik)Zi z2na*LILWSsBTF(NuS#L2eKUQF7v_9wBANTnbtCDG=?smSnWE7$VSJOS#mTMP<T77q z$1kA*P>{`7(i^setz>jbgO&FCL2sxd7Xpm&48VYP5_-0=zOftYYcAs<Z*lFQi;G|k z*RWs^9dHd?)e}5v7ip)2<l7M{XVzI(O8gKHJl^jOxYJkFew*}B>F`$r#vQ#(n$*C? zY<tgIGrwDS&VF*IpCLii6R6`u+x|2s{T;RNM;o`iGqZv=*I4w8PVG>v^IM^#bOz@9 zq`n9l#7q0YtDw~mYxE>U@vrN2Sl!^*4Wl#x27lDS^|N5z<OX2*2S2%6dF4TV+*%uZ zz((rBNto(K)jmUWPi7D%9a;$<2W{*0wu4VWVcpa;A-=zk-;?68fVnC0i7;Hjb5GxS zFo8OXYEZ7qPT2AfbA8jtfnv!*jvROy&Ew(xmWRxQOdQ;w5QMUs$HUO_1F;b#d0M<B z&d68#&Ux?So{jHslh(x-&*2JbWzRkyEF6kR2fya5M=-q_6||L0e6MA32fQwnDq%VI zKlJ*-nkchQ4|0x(S+VR$W$qLs5}QY=<~dl$s+zOA6u|Mj)jx?}s7_sZqqF(d1j(&e z6wEx(j2OJ9gGaJ$*Q6pDG6%Fr?<kOF?I7W1?M$I$?Igft?I?g`?LfJ(4I>X*#F5G% z=Lm;YQ#A&?O(CD1t*$wT5V}94Y!0vSgFZUCBR=}AuI+RHk{r$fNnS62q<Ig!rdb%f z)bZ(}mLFzH7-e)G3YQD-Y-u!UqokG|Uei0+lBCc~b0cc<16`KHtQ+p)5y)j9K6-~> zj0p_}6a-^22VJy3XUqx4z!5djs0oOfc0@C|y#^>tSOs>l<v+_|FS;n?LPJ<)%0L@A ze<v8#;ERpuu$LsmMpFZ>x;|*bh$MnK4hhXX)n^|a849l<x!6$0LGhv9$v~~^{uPp8 zcsB_TSK@DF!1Trn8hn(Gr6Ev!T$Cj?tIQr-+HZxU*rR$SIcE>J+4$pT4)9O8CyH8x zySgtoUVK|z6Q#RL{8+!sh!{>O0~h)@DiJWlS#5~nH?v_zyj46ht6Vd);np{~#o$vI zx1>dxYhMo;3AJjH7<<QW|5S{!5G77lgB~woK&$5;xmIRcuUD`(wac39q{S9BpTHC8 z*0)`+HFd9!OZzOos0Fk@GB#>56+<4eY462du_noUU1h!0V`Q!7uW1`E^<}Oi5v(c3 z^ihNrZljalqhz}N+xi!4?BVNBahCzf#8H}go6-0fX&%XU8qIM{2AS%{z(avK#u{v> z^Ti_f;B#XNC7d4JGjf>$;5C@-{jGEBOCn63iCO&5psys+!W1Us&k~y(epg2umNXvK zNt?H^rA-GS;o)$n!bmbK=#pY0WKH8}Wki{u;V%&1Ia2AOEi4|eYZzvzxsV<El0=&h ziKfzUF!g62KiA^0bo$rJ9PS9K6nPN^YKFt#I6X5G+!G#mj|S|HOg+>y4HHf%EojVa zJu{+mIJ~7p4P{$-O<hS(EyC-34B2g#;&>xB@L~Q`E?ws<-)HcgJv+;Po#VqEvr!vz z!EXgiy4ysNm=9%PNM`$HQ@4f8v3Oa!B8|loChj2d^YT40GGZJXLTnEjwAd*)Q1Pj^ zR|0?P0|p;<{)6QYqm>II)(BQ>pyDXT^M;zcR|ojZ>@Qz85A4e&8Tz235-eCrbZ3kt zyCO;0JUWx1dQOsE)p0`X@4(YhZuz5`-Tt3T=0sP)Bz$9dD0FPpxo9Xqww3G4Eip{H zpyDFxxzdsj7ni;InA5Tss?S(h-JNc$`+UcR1=1Y{SjDqfn`!j(R9Z}J)HhU$XACE6 zwjK4rlAn$(dSbB&PaAb8PI1jTU~P_XuCD#kC>nSPB3U%m(AH&`1y1d3NWA(VEn2R; z$m<L}SAU_Taj)C9>5<!O<heA4nC7`Pvhl3d+RXHmkLBTjZhfKmQ{&DfeUw`+vM^oM zu+b)}qRSS({6Zha@dYI(yih<JsB1=+)$YY^+i}HMWw*p&celi4J{9*DMJ?g0=3yp2 zt3}qen!0>|;|5(3Z)}c#T0M_lk7Hlii>z*o)mF+rpT#boo1kM8^N-e-x*LZbF&6^& z)rg;<3An#3$(*u^!^k+l-Yo6{lE17%-CBT~gRNSCpE(wzH$Grj5e=F*9@oJ*kn%7> zu0k~`5QEUq63~Kkf{?2ioRh0ae1b(MnGy~K<ZOY!QL0$PqtY}!Ak%z#ghE&PpL761 zB|_-WVs|<#yc1m^Fw_=>!eCmZNP!sTnvC~IG>ya1=*uqvgwJ2|XhaG^VUYhQDplIM zDqWfogz?WLwHBObSoGzc5STMx07O7I6z$KZ{qu+ZXb<s!HgRIKk3cx|o~uyUo3Kzg zDqevY{9kSRAIKlK(C8W7(En<E5xu~Pa!U2sY4~Gt-cQ>5j{Np=+PnVl)D<y&H*tFF zJu}xH%&q7Y`A>gDc~Wc~LLkp*NH|9<J8G@%XGxPNPzv<uH*Rbh&v*;CjEd;G{y?MK ziCexk$0mT(idmT-w!Lns9IiNl1}-!nlwS?u*wYL#RG46EdKF?MY=SSt81}V!E43AA zf;#_vB5n&>ekR5VyeCfyaiPHpb7si-V{&fOf@pazA;6WL%s>|&TCW-c%U}I-N=pUt zA9)@q|DSmdH2v*3D7DpLV3if#awBpvJpjnc9Max_Rl-aWpehaZ&mi-237dOX0axzq zBAy*MMXx&W^(2iI#K0$361@O)rw%;RdvzF<z4~XB><ZcW&Mm8yaC+xel5v&+*^xA2 zJBMaC{(ADUWmfHasJtbV+qvn3R+~|`tn8^dZ#!Xi&UR=je;#I!;dVs}#12Kj>LF8U z+3I`D@@&BKFlalq<D}C!{mZoJ#*U?B>HKNV1i3l@4+|(?^)F+mzEl5|;xFbQCm|Qm z8-P?bfFW<Ga9mPK{+mgfnQOHvoBO93J+snjp56VM*>wT5#Ojac{At>iDS|c=1(QQe zMTH)GYH`yh0gel>*E|z*z~Z;BD6}$LQ>u2}W7Byq(J%o!Gy5KT%=$K9eR7{t>a}eS z&&InhR<VF$X{?)E;0LVo)==i)>s3wj3#+fFuZ@PbGGBF<s@!k?@VEr~X5iOZ5I!OW zJGv(1rqe~c^un9!S<5f&syhak-j>-VsG_ELnu%7L$2tNqw>gD5a~$=W5hX&q83|y$ z<Au+v?6G>9p-L-^9(WsdR`zWU7dSVXinjO!8QQZg&GHrBs<_WsS!H;EU$Rrny60<- zoNnFkUhkfL>R8F{G-jvP_P0a3`PZ1k`dQR<jZ>O*1)DVnd>B)XSSMi!&W_x(92M5~ zyUOj%%V{>69?udsW<}>d+NJ^x?s>Sly0CQP#s}u>;45%y!l%x(s#`X#_Rl}Urq->7 zMlF0)<gM;#uY~#6D(|@n2=y`uIPDu)m47W6bZh={YHRVUb|M!%5p{|CxrBwdkgLq^ z=+<F=jA_m08qLpjOjE@(lc7U}d3*ll*Wu>nYv*#CpKBgY`8Zb1JLAKQv-93@==fdx zQRBI~HDU4A90MU@#n@(}nl*t3dH$PlckvwN0qm1k`RFklQjVE&?fKNtE$FWLG1BVu zVX+z4JCfA1lGzLO?;^^g&;D$MMWe@!wwNl}&tl4F`qvg#1g#6S6MMqW9jfrwiaI;b zm72|v_gj0azN!{8y#fMU-8|i;OE;;8Sk7u~O*lug3Z*ry$Q4!U{bMI%WBXUWPt+f; z|6P6|^xN7`2S_hp0YUqJ3FfkMQ86^K`BP3@P2mSDX3+v3K)Qkp_>sv!a2-J?W1~T2 z!{+&cWEC+K6VW7i;ucIUatir{e-(na?{YxY@5A_hAsY@T<~U9OX#&pl+I_iwe2Rn= zPv^T~D4mvxN+did-6#$k$%uR!`C!UDz4MjFr8CNr{3QM!W%Y4*xfdP0$CTJ-fp{HC z;KXvRY<z8qWRav4ch@~$7BDJ869>Oq?%7-S+&c0QvCGY2GsI%1_9;YRfsy%`8ZDD7 zW=cJYjthNg4>HP3C`I@i-2%Gp6WX#;IhT48AP8)dkJbZ^weEY!89tJ^h$KB~17bEc ztrL|5Vj}L<De#mfM1&iVt}tUZ^D{YVz_=|!bhcwVE<$t*m%sWrhR}>zgq&))o?{28 zT$ug7Fb&^W0N6(&WEMYUvwG?Ldn+-WAl=KHHUjv5f<q81hw`}Wu7-r{=Zuctr4e!D ztO5j`vzAz``(sEqV#&JcS>dSAQYgxd1{+n;yCh3pnceY(#wdM*L7Te46f@@xjYDjl zr*BY&7JQkHqGTb*L@WYJA|x+#57;S*^AtfgAzX1C@X>VJT-ofG;{{B$#zt_artAc9 zikNU{6gjc}n}rMZEqVR@DURh`&h5ABl5wK3iBK_(1-#4EpS5CRsD^EAc3l=E4j#E< za=IS$q?1{;bvDOyKDW|5zu%v{ow?il-WmJeIs5u{bw8`wo&93KDIkd9Db-&QeG|`h z#8y_7LY<bG3zAdkUQ5lWmBzWip=-ewzzRX}O3k$mHbgID#j+L6v#8}W?QF5h%UW(2 z%43>zosY2{ol-&+Shln7T8n#5MpWRiSXVTcNl<w&DL%IoSo3{XT9Cef!);|9y;oX$ zdC<RA;+_ic`PVWW7=#|6!2I_!ynsS~yZ+0GUIppDSMc}ag?~kt0F?H>95(zN`1f;H ze+3Q!LUI28PGS9C)9<He{#}<8V0iw=S(@MBzXxXi8_t0G5BT50G=Inc9trnv{2L&c z=+B@3Yh2v#@ZW=v{tX|+`Um{?P^90h_<i&F-&Mc?)VTlAR{o;1{SN>8*2urXKtNSw zKtTVoQSv+d?^5(%;Xagqf&W98{vG|hF!*mg9nC-5{+mqrdkw#9N&l{am+qfW|1V|f pcku6y{NLagu77~d0H@mj<<=FX!2#?90)hv8ga8EZ<Nf3A{{h@6CKmtz literal 0 HcmV?d00001 -- GitLab