Skip to content

Commit

Permalink
Merge pull request cisagov#45 from cisagov/feature/inventory-report
Browse files Browse the repository at this point in the history
feature: inventory report
  • Loading branch information
Dbones202 authored Aug 16, 2023
2 parents 7fb2221 + ff55011 commit 427b3a5
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 74 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ install_requires =
lxml>=4.3.2
netaddr>=0.8.0
openpyxl>=3.1.2
pandas>=2.0.3
tqdm>=4.57.0

[options.packages.find]
Expand Down
102 changes: 102 additions & 0 deletions src/navv/bll.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import os
from ipaddress import IPv4Address, IPv6Address
import pandas as pd

from navv.zeek import perform_zeekcut
from navv.utilities import get_mac_vendor
from navv.validators import is_ipv4_address, is_ipv6_address


def get_zeek_data(zeek_logs):
"""Return a list of Zeek conn.log data."""
return (
perform_zeekcut(
fields=[
"id.orig_h",
"id.resp_h",
"id.resp_p",
"proto",
"conn_state",
"orig_l2_addr",
"resp_l2_addr",
],
log_file=os.path.join(zeek_logs, "conn.log"),
)
.decode("utf-8")
.split("\n")[:-1]
)


def get_zeek_df(zeek_data: list):
"""Return a pandas dataframe of the conn.log data."""
zeek_data = [row.split("\t") for row in zeek_data]

return pd.DataFrame(
zeek_data,
columns=["src_ip", "dst_ip", "port", "proto", "conn", "src_mac", "dst_mac"],
)


def get_inventory_report_df(zeek_df: pd.DataFrame):
"""Return a pandas dataframe of the inventory report data."""
zeek_df["port_and_proto"] = zeek_df["port"] + "/" + zeek_df["proto"]

zeek_df["src_ipv4"] = zeek_df["src_ip"].apply(
lambda ip: ip if is_ipv4_address(ip) else None
)
zeek_df["src_ipv6"] = zeek_df["src_ip"].apply(
lambda ip: ip if is_ipv6_address(ip) else None
)

zeek_df["dst_ipv4"] = zeek_df["dst_ip"].apply(
lambda ip: ip if is_ipv4_address(ip) else None
)
zeek_df["dst_ipv6"] = zeek_df["dst_ip"].apply(
lambda ip: ip if is_ipv6_address(ip) else None
)

src_df = zeek_df[
["src_mac", "src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
].reset_index(drop=True)
src_df["mac"] = src_df["src_mac"]

dst_df = zeek_df[
["dst_mac", "src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
].reset_index(drop=True)
dst_df["mac"] = dst_df["dst_mac"]

df = (
pd.concat([src_df, dst_df])
.reset_index(drop=True)
.drop(columns=["src_mac", "dst_mac"])
.drop_duplicates(
subset=["src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6", "port_and_proto"]
)
)
df["vendor"] = df["mac"].apply(lambda mac: get_mac_vendor(mac))

grouped_df = (
df.groupby("mac", as_index=False)
.agg(
{
"src_ipv4": list,
"src_ipv6": list,
"dst_ipv4": list,
"dst_ipv6": list,
"port_and_proto": list,
}
)
.reset_index()
)
grouped_df["vendor"] = grouped_df["mac"].apply(lambda mac: get_mac_vendor(mac))
grouped_df["ipv4"] = (grouped_df["src_ipv4"] + grouped_df["dst_ipv4"]).apply(
lambda ip: list(set(ip))
)
grouped_df["ipv6"] = (grouped_df["src_ipv6"] + grouped_df["dst_ipv6"]).apply(
lambda ip: list(set(ip))
)
grouped_df.drop(
columns=["src_ipv4", "src_ipv6", "dst_ipv4", "dst_ipv6"], inplace=True
)

return grouped_df
33 changes: 13 additions & 20 deletions src/navv/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# Third-Party Libraries
import click
from navv.bll import get_inventory_report_df, get_zeek_data, get_zeek_df

# cisagov Libraries
from navv.gui import app
Expand All @@ -25,7 +26,8 @@
write_stats_sheet,
write_unknown_internals_sheet,
)
from navv.utilities import pushd, run_zeek, perform_zeekcut, trim_dns_data
from navv.zeek import run_zeek, perform_zeekcut
from navv.utilities import pushd, trim_dns_data


@click.command("generate")
Expand Down Expand Up @@ -68,27 +70,18 @@ def generate(customer_name, output_dir, pcap, zeek_logs):
run_zeek(os.path.abspath(pcap), zeek_logs, timer=timer_data)
else:
timer_data["run_zeek"] = "NOT RAN"
zeek_data = (
perform_zeekcut(
fields=[
"id.orig_h",
"id.resp_h",
"id.resp_p",
"proto",
"conn_state",
"orig_l2_addr",
"resp_l2_addr",
],
log_file=os.path.join(zeek_logs, "conn.log"),
)
.decode("utf-8")
.split("\n")[:-1]
)

# turn zeekcut data into rows for spreadsheet
# Get zeek data
zeek_data = get_zeek_data(zeek_logs)
zeek_df = get_zeek_df(zeek_data)

# Get inventory report dataframe
inventory_df = get_inventory_report_df(zeek_df)

# Turn zeekcut data into rows for spreadsheet
rows, mac_dict = create_analysis_array(zeek_data, timer=timer_data)

# get dns data for resolution
# Get dns data for resolution
json_path = os.path.join(output_dir, f"{customer_name}_dns_data.json")

if os.path.exists(json_path):
Expand Down Expand Up @@ -117,7 +110,7 @@ def generate(customer_name, output_dir, pcap, zeek_logs):
timer=timer_data,
)

write_inventory_report_sheet(mac_dict, wb)
write_inventory_report_sheet(inventory_df, wb)

write_macs_sheet(mac_dict, wb)

Expand Down
61 changes: 43 additions & 18 deletions src/navv/spreadsheet_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,27 +347,52 @@ def write_conn_states_sheet(conn_states, wb):
auto_adjust_width(new_ws)


def write_inventory_report_sheet(mac_dict, wb):
def write_inventory_report_sheet(inventory_df, wb):
"""Get Mac Addresses with their associated IP addresses and manufacturer."""
ir_sheet = make_sheet(wb, "Inventory Report", idx=4)
ir_sheet.append(["MAC", "Vendor", "IPs"])
for row_index, mac in enumerate(mac_dict, start=2):
ir_sheet[f"A{row_index}"].value = mac
orgs = utilities.get_mac_vendor(mac)
ir_sheet.append(["MAC", "Vendor", "IPv4", "IPv6", "Port and Proto"])

inventory_data = inventory_df.to_dict(orient="records")
for index, row in enumerate(inventory_data, start=2):
# Mac Address column
ir_sheet[f"A{index}"].value = row["mac"]

# Vendor column
ir_sheet[f"B{index}"].value = row["vendor"]

# IPv4 Address column
ipv4_list_cell = ir_sheet[f"C{index}"]
ipv4_list_cell.alignment = openpyxl.styles.Alignment(wrap_text=True)

ipv4 = ""
if row["ipv4"]:
ipv4 = ", ".join(each for each in row["ipv4"] if each)
ipv4_list_cell.value = ipv4

# IPv6 Address column
ipv6_list_cell = ir_sheet[f"D{index}"]
ipv6_list_cell.alignment = openpyxl.styles.Alignment(wrap_text=True)

ipv6 = ""
if row["ipv6"]:
ipv6 = ", ".join(each for each in row["ipv6"] if each)
ipv6_list_cell.value = ipv6

# Port and Protocol column
pnp_sheet = ir_sheet[f"E{index}"]
pnp_sheet.alignment = openpyxl.styles.Alignment(wrap_text=True)

port_and_proto = ""
if row["port_and_proto"]:
port_and_proto = ", ".join(
list(set(each for each in row["port_and_proto"] if each))[:10]
)

ir_sheet[f"B{row_index}"].value = "\n".join(orgs)
ip_list_cell = ir_sheet[f"C{row_index}"]
ip_list_cell.alignment = openpyxl.styles.Alignment(wrap_text=True)
num_ips = len(mac_dict[mac])
if num_ips > 10:
display_list = mac_dict[mac][:10]
display_list.append(f"Displaying 10 IPs of {num_ips}")
ip_list_cell.value = "\n".join(display_list)
else:
ip_list_cell.value = "\n".join(mac_dict[mac][:10])
ir_sheet.row_dimensions[row_index].height = min(num_ips, 11) * 15
if row_index % 2 == 0:
for cell in ir_sheet[f"{row_index}:{row_index}"]:
pnp_sheet.value = port_and_proto

# Add styling to every other row
if index % 2 == 0:
for cell in ir_sheet[f"{index}:{index}"]:
cell.fill = openpyxl.styles.PatternFill("solid", fgColor="AAAAAA")
auto_adjust_width(ir_sheet)
ir_sheet.column_dimensions["C"].width = 39 * 1.2
Expand Down
48 changes: 12 additions & 36 deletions src/navv/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
import os
import contextlib
import json
from subprocess import Popen, PIPE, STDOUT, check_call
import time

from netaddr import EUI, core as netaddr_core
from tqdm import tqdm

from navv.message_handler import info_msg, error_msg
from navv.validators import is_mac_address


MAC_VENDORS_JSON_FILE = os.path.abspath(__file__ + "/../" + "data/mac-vendors.json")
Expand Down Expand Up @@ -45,27 +44,6 @@ def timed(*args, **kw):
return timed


@timeit
def run_zeek(pcap_path, zeek_logs_path, **kwargs):
with pushd(zeek_logs_path):
# can we add Site::local_nets to the zeek call here?
err = check_call(["zeek", "-C", "-r", pcap_path, "local.zeek"])
error_msg(f"Zeek returned with code: {err}")


def perform_zeekcut(fields, log_file):
"""Perform the call to zeek-cut with the identified fields on the specified log file"""
try:
with open(log_file, "rb") as f:
zeekcut = Popen(
["zeek-cut"] + fields, stdout=PIPE, stdin=PIPE, stderr=STDOUT
)
return zeekcut.communicate(input=f.read())[0]
except OSError as e:
# probably "file does not exist"
return b""


def trim_dns_data(data):
"""Find entries in dns log that contain no_error and return a dict of {ip: hostname,}"""
ret_data = {}
Expand All @@ -80,27 +58,25 @@ def trim_dns_data(data):
return ret_data


def get_mac_vendor(mac_address: str) -> list:
def get_mac_vendor(mac_address: str) -> str:
"""Return the vendor of the MAC address."""
mac_address = mac_address.upper()

try:
EUI(mac_address)
except netaddr_core.AddrFormatError:
if not is_mac_address(mac_address):
error_msg(f"Invalid MAC address: {mac_address}")
return [f"Bad MAC address {mac_address}"]
return f"Bad MAC address {mac_address}"

with open(MAC_VENDORS_JSON_FILE) as f:
mac_vendors = json.load(f)

vendor = [
vendor["vendorName"]
for vendor in mac_vendors
if mac_address.startswith(vendor["macPrefix"])
]

if not vendor:
try:
vendor = [
vendor["vendorName"]
for vendor in mac_vendors
if mac_address.startswith(vendor["macPrefix"])
][0]
except IndexError:
error_msg(f"Unknown vendor for MAC address: {mac_address}")
return [f"Unknown vendor for MAC address {mac_address}"]
return "Unknown Vendor"

return vendor
29 changes: 29 additions & 0 deletions src/navv/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from ipaddress import IPv4Address, IPv6Address
import re


def is_ipv4_address(ip_address: str) -> bool:
"""Return True if address is a valid IPv4 address."""
try:
IPv4Address(ip_address)
return True
except ValueError:
return False


def is_ipv6_address(ip_address: str) -> bool:
"""Return True if address is a valid IPv6 address."""
try:
IPv6Address(ip_address)
return True
except ValueError:
return False


def is_mac_address(mac_address: str) -> bool:
"""Return True if address is a valid MAC address."""
if re.match(
"[0-9a-f]{2}([-:])[0-9a-f]{2}(\\1[0-9a-f]{2}){4}$", mac_address.lower()
):
return True
return False
27 changes: 27 additions & 0 deletions src/navv/zeek.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from subprocess import Popen, PIPE, STDOUT, check_call

from navv.message_handler import error_msg
from navv.utilities import pushd, timeit


@timeit
def run_zeek(pcap_path, zeek_logs_path, **kwargs):
with pushd(zeek_logs_path):
# can we add Site::local_nets to the zeek call here?
try:
check_call(["zeek", "-C", "-r", pcap_path, "local.zeek"])
except Exception as e:
error_msg(e)


def perform_zeekcut(fields, log_file):
"""Perform the call to zeek-cut with the identified fields on the specified log file"""
try:
with open(log_file, "rb") as f:
zeekcut = Popen(
["zeek-cut"] + fields, stdout=PIPE, stdin=PIPE, stderr=STDOUT
)
return zeekcut.communicate(input=f.read())[0]
except OSError as e:
# probably "file does not exist"
return b""

0 comments on commit 427b3a5

Please sign in to comment.