forked from muxamilian/privacy-tuw
-
Notifications
You must be signed in to change notification settings - Fork 11
/
characteristic_flows.py
executable file
·107 lines (80 loc) · 4.89 KB
/
characteristic_flows.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.colors as mc
import colorsys
import numpy as np
import sys
import os
import json
import pickle
from learn import numpy_sigmoid
DIR_NAME = "plots/characteristic_flows"
with open("categories_mapping.json", "r") as f:
categories_mapping_content = json.load(f)
categories_mapping, mapping = categories_mapping_content["categories_mapping"], categories_mapping_content["mapping"]
reverse_mapping = {v: k for k, v in mapping.items()}
# print("reverse_mapping", reverse_mapping)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
FEATURE_NAMES = ["packet length", "iat"]
LINESTYLES = ["solid", "dashed", "dashdot", "dotted"]
with open("flows_full_no_ttl_normalization_data.pickle", "rb") as f:
means, stds = pickle.load(f)
file_name = sys.argv[1]
with open(file_name, "rb") as f:
loaded = pickle.load(f)
results_by_attack_number = loaded["results_by_attack_number"]
flows_by_attack_number = loaded["flows_by_attack_number"]
result_ranges_by_attack_number = loaded["result_ranges_by_attack_number"]
sample_indices_by_attack_number = loaded["sample_indices_by_attack_number"]
features = loaded["features"]
# print("features", features)
for attack_type, (results_by_attack_number_item, flows_by_attack_number_item, result_ranges_by_attack_number_item, sample_indices_by_attack_number_item) in enumerate(zip(results_by_attack_number, flows_by_attack_number, result_ranges_by_attack_number, sample_indices_by_attack_number)):
assert len(results_by_attack_number_item) == len(flows_by_attack_number_item) == len(result_ranges_by_attack_number_item) == len(sample_indices_by_attack_number_item)
if len(results_by_attack_number_item) <= 0:
continue
sorted_seq_indices = [item[0] for item in sorted(enumerate(results_by_attack_number_item), key=lambda x: x[1].shape[0], reverse=True)]
max_length = results_by_attack_number_item[sorted_seq_indices[0]].shape[0]
print("n_flows", len(result_ranges_by_attack_number_item), "max_length", max_length)
# print("result shapes", [item.shape for item in results_by_attack_number_item])
padded_results = [np.concatenate((item, np.tile(item[-1], max_length-item.shape[0]))) for item in results_by_attack_number_item]
# print("flow shapes", [item.shape for item in flows_by_attack_number_item])
padded_flows = [np.concatenate((item, np.tile(item[-1:,:], (max_length-item.shape[0], 1)))) for item in flows_by_attack_number_item]
# print("padded result shapes", [item.shape for item in padded_results])
best_flow = max(enumerate(padded_results), key=lambda x: sum(x[1]))[0]
worst_flow = min(enumerate(padded_results), key=lambda x: sum(x[1]))[0]
stacked = np.stack(padded_results)
# print("stacked.shape", stacked.shape)
mean_flow = np.mean(stacked, axis=0)
mean_features_flow = np.mean(np.stack(padded_flows), 0)
# print("mean_flow.shape", mean_flow.shape)
# print("mean_features_flow.shape", mean_features_flow.shape)
average_flow = min(enumerate(padded_results), key=lambda x: np.linalg.norm(mean_flow - x[1]))[0]
# print("best_flow", best_flow, "worst_flow", worst_flow, "average_flow", average_flow)
all_legends = []
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
for feature_index_from_zero, (feature_name, feature_index, ax) in enumerate(zip(FEATURE_NAMES, (3, 4), (ax1, ax2))):
ax.set_ylabel(feature_name, color=colors[feature_index_from_zero])
for flow_index, (flow_name, flow) in enumerate(zip(("best confidence", "worst confidence", "average confidence", "average flow"), (flows_by_attack_number_item[best_flow], flows_by_attack_number_item[worst_flow], flows_by_attack_number_item[average_flow], mean_features_flow))):
if flow_index >= 2:
break
legend = "{}, {}".format(flow_name, feature_name)
# print("flow_dataset_index", flow_dataset_index)
# print("flow.shape", flow.shape)
ret = ax.plot(range(flow.shape[0]), flow[:,feature_index]*stds[feature_index]+means[feature_index], label=legend, linestyle=LINESTYLES[flow_index], color=colors[feature_index_from_zero])
forward_direction = flow[0,5]
forward_points = flow[:,5] == forward_direction
ret2 = ax.scatter(np.arange(flow.shape[0])[forward_points], (flow[:,feature_index]*stds[feature_index]+means[feature_index])[forward_points], color=colors[feature_index_from_zero], marker=">")
ret3 = ax.scatter(np.arange(flow.shape[0])[~forward_points], (flow[:,feature_index]*stds[feature_index]+means[feature_index])[~forward_points], color=colors[feature_index_from_zero], marker="<")
# print("flow[:,5]", flow[:,5])
all_legends += ret
plt.title(reverse_mapping[attack_type])
all_labels = [item.get_label() for item in all_legends]
ax1.legend(all_legends, all_labels)
plt.xlabel('Sequence index')
plt.tight_layout()
# plt.show()
os.makedirs(DIR_NAME, exist_ok=True)
plt.savefig(DIR_NAME+'/{}_{}_{}.pdf'.format(file_name.split("/")[-1], attack_type, reverse_mapping[attack_type].replace("/", "-").replace(":", "-")))
plt.clf()