diff --git a/CHANGELOG.md b/CHANGELOG.md index f8f1ecda9a..57d01727a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +* Added support for listing `%SystemDrive%\Users` as a supplementary mechanism + for collecting user profiles on Windows (additionally to using data from the + registry). + ### Removed * Removed the `ListFlowApplicableParsers` API method. * Removed the `ListParsedFlowResults` API method. +* Removed support for the `GREP` artifact source (these were internal to GRR and + not part of the [official specification](https://artifacts.readthedocs.io/en/latest/sources/Format-specification.html). ## [3.4.7.4] - 2024-05-28 diff --git a/Dockerfile b/Dockerfile index 02ea7612a7..30ab1edbfc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ # A Docker image capable of running all GRR components. # -# See https://hub.docker.com/r/grrdocker/grr/ +# See https://github.com/google/grr/pkgs/container/grr # # We have configured Github Actions to trigger an image build every # time a new a PUSH happens in the GRR github repository. diff --git a/api_client/python/grr_api_client/client.py b/api_client/python/grr_api_client/client.py index 03a735db23..34e87292d2 100644 --- a/api_client/python/grr_api_client/client.py +++ b/api_client/python/grr_api_client/client.py @@ -2,12 +2,12 @@ """Clients-related part of GRR API client library.""" from collections import abc +import time from typing import Sequence from grr_api_client import flow from grr_api_client import utils from grr_api_client import vfs -from grr_response_core.lib import rdfvalue from grr_response_proto.api import client_pb2 from grr_response_proto.api import flow_pb2 from grr_response_proto.api import user_pb2 @@ -209,10 +209,9 @@ def CreateApproval( expiration_time_us = 0 if expiration_duration_days != 0: - expiration_time_us = ( - rdfvalue.RDFDatetime.Now() - + rdfvalue.Duration.From(expiration_duration_days, rdfvalue.DAYS) - ).AsMicrosecondsSinceEpoch() + expiration_time_us = int( + (time.time() + expiration_duration_days * 24 * 3600) * 1e6 + ) approval = user_pb2.ApiClientApproval( reason=reason, diff --git a/api_client/python/grr_api_client/flow.py b/api_client/python/grr_api_client/flow.py index a04538f2f1..9898048a45 100644 --- a/api_client/python/grr_api_client/flow.py +++ b/api_client/python/grr_api_client/flow.py @@ -11,7 +11,6 @@ from grr_api_client import context as api_context from grr_api_client import errors from grr_api_client import utils -from grr_response_core.lib.util import aead from grr_response_proto.api import flow_pb2 from grr_response_proto.api import osquery_pb2 from grr_response_proto.api import timeline_pb2 @@ -268,5 +267,5 @@ def DecryptLargeFile( with input_context as input_stream: with output_context as output_stream: - decrypted_stream = aead.Decrypt(input_stream, encryption_key) + decrypted_stream = utils.AEADDecrypt(input_stream, encryption_key) shutil.copyfileobj(decrypted_stream, output_stream) diff --git a/api_client/python/grr_api_client/utils.py b/api_client/python/grr_api_client/utils.py index 3a30cf9be3..9e1f1dca63 100644 --- a/api_client/python/grr_api_client/utils.py +++ b/api_client/python/grr_api_client/utils.py @@ -1,6 +1,9 @@ #!/usr/bin/env python """Utility functions and classes for GRR API client library.""" +import io +import itertools +import struct import time from typing import Any from typing import Callable @@ -11,6 +14,8 @@ from typing import TypeVar from typing import Union +from cryptography.hazmat.primitives.ciphers import aead + from google.protobuf import any_pb2 from google.protobuf import wrappers_pb2 from google.protobuf import descriptor @@ -307,6 +312,97 @@ def Xor(bytestr: bytes, key: int) -> bytes: return bytes([byte ^ key for byte in bytestr]) +class _Unchunked(io.RawIOBase, IO[bytes]): # pytype: disable=signature-mismatch # overriding-return-type-checks + """A raw file-like object that reads chunk stream on demand.""" + + def __init__(self, chunks: Iterator[bytes]) -> None: + """Initializes the object.""" + super().__init__() + self._chunks = chunks + self._buf = io.BytesIO() + + def readable(self) -> bool: + return True + + def readall(self) -> bytes: + return b"".join(self._chunks) + + def readinto(self, buf: bytearray) -> int: + if self._buf.tell() == len(self._buf.getbuffer()): + self._buf.seek(0, io.SEEK_SET) + self._buf.truncate() + self._buf.write(next(self._chunks, b"")) + self._buf.seek(0, io.SEEK_SET) + + return self._buf.readinto(buf) + + +def AEADDecrypt(stream: IO[bytes], key: bytes) -> IO[bytes]: + """Decrypts given file-like object using AES algorithm in GCM mode. + + Refer to the encryption documentation to learn about the details of the format + that this function allows to decode. + + Args: + stream: A file-like object to decrypt. + key: A secret key used for decrypting the data. + + Returns: + A file-like object with decrypted data. + """ + aesgcm = aead.AESGCM(key) + + def Generate() -> Iterator[bytes]: + # Buffered reader should accept `IO[bytes]` but for now it accepts only + # `RawIOBase` (which is a concrete base class for all I/O implementations). + reader = io.BufferedReader(stream) # pytype: disable=wrong-arg-types + + # We abort early if there is no data in the stream. Otherwise we would try + # to read nonce and fail. + if not reader.peek(): + return + + for idx in itertools.count(): + nonce = reader.read(_AEAD_NONCE_SIZE) + + # As long there is some data in the buffer (and there should be because of + # the initial check) there should be a fixed-size nonce prepended to each + # chunk. + if len(nonce) != _AEAD_NONCE_SIZE: + raise EOFError(f"Incorrect nonce length: {len(nonce)}") + + chunk = reader.read(_AEAD_CHUNK_SIZE + 16) + + # `BufferedReader#peek` will return non-empty byte string if there is more + # data available in the stream. + is_last = reader.peek() == b"" # pylint: disable=g-explicit-bool-comparison + + adata = _AEAD_ADATA_FORMAT.pack(idx, is_last) + + yield aesgcm.decrypt(nonce, chunk, adata) + + if is_last: + break + + return io.BufferedReader(_Unchunked(Generate())) + + +# We use 12 bytes (96 bits) as it is the recommended IV length by NIST for best +# performance [1]. See AESGCM documentation for more details. +# +# [1]: https://csrc.nist.gov/publications/detail/sp/800-38d/final +_AEAD_NONCE_SIZE = 12 + +# Because chunk size is crucial to the security of the whole procedure, we don't +# let users pick their own chunk size. Instead, we use a fixed-size chunks of +# 4 mebibytes. +_AEAD_CHUNK_SIZE = 4 * 1024 * 1024 + +# As associated data for each encrypted chunk we use an integer denoting chunk +# id followed by a byte with information whether this is the last chunk. +_AEAD_ADATA_FORMAT = struct.Struct("!Q?") + + def RegisterProtoDescriptors( db: symbol_database.SymbolDatabase, *additional_descriptors: descriptor.FileDescriptor, diff --git a/api_client/python/grr_api_client/utils_test.py b/api_client/python/grr_api_client/utils_test.py index 5544af6e67..b3b89d1e70 100644 --- a/api_client/python/grr_api_client/utils_test.py +++ b/api_client/python/grr_api_client/utils_test.py @@ -1,8 +1,11 @@ #!/usr/bin/env python import io +import os import struct from absl.testing import absltest +from cryptography import exceptions +from cryptography.hazmat.primitives.ciphers import aead from google.protobuf import empty_pb2 from google.protobuf import timestamp_pb2 @@ -96,5 +99,69 @@ def testDecodeSeveralChunks(self): self.assertEqual(b"".join(decoded), content) +class AEADDecryptTest(absltest.TestCase): + + def testReadExact(self): + key = os.urandom(32) + + aesgcm = aead.AESGCM(key) + nonce = os.urandom(utils._AEAD_NONCE_SIZE) + adata = utils._AEAD_ADATA_FORMAT.pack(0, True) + encrypted = io.BytesIO( + nonce + aesgcm.encrypt(nonce, b"foobarbazquxnorf", adata) + ) + + decrypted = utils.AEADDecrypt(encrypted, key) + self.assertEqual(decrypted.read(3), b"foo") + self.assertEqual(decrypted.read(3), b"bar") + self.assertEqual(decrypted.read(3), b"baz") + self.assertEqual(decrypted.read(3), b"qux") + self.assertEqual(decrypted.read(4), b"norf") + + self.assertEqual(decrypted.read(), b"") + + def testIncorrectNonceLength(self): + key = os.urandom(32) + + buf = io.BytesIO() + + nonce = os.urandom(utils._AEAD_NONCE_SIZE - 1) + buf.write(nonce) + buf.seek(0, io.SEEK_SET) + + with self.assertRaisesRegex(EOFError, "nonce length"): + utils.AEADDecrypt(buf, key).read() + + def testIncorrectTag(self): + key = os.urandom(32) + aesgcm = aead.AESGCM(key) + + buf = io.BytesIO() + + nonce = os.urandom(utils._AEAD_NONCE_SIZE) + buf.write(nonce) + buf.write(aesgcm.encrypt(nonce, b"foo", b"QUUX")) + buf.seek(0, io.SEEK_SET) + + with self.assertRaises(exceptions.InvalidTag): + utils.AEADDecrypt(buf, key).read() + + def testIncorrectData(self): + key = os.urandom(32) + aesgcm = aead.AESGCM(key) + + buf = io.BytesIO() + + nonce = os.urandom(utils._AEAD_NONCE_SIZE) + adata = utils._AEAD_ADATA_FORMAT.pack(0, True) + buf.write(nonce) + buf.write(aesgcm.encrypt(nonce, b"foo", adata)) + buf.getbuffer()[-1] ^= 0b10101010 # Corrupt last byte. + buf.seek(0, io.SEEK_SET) + + with self.assertRaises(exceptions.InvalidTag): + utils.AEADDecrypt(buf, key).read() + + if __name__ == "__main__": absltest.main() diff --git a/appveyor/windows_templates/build_windows_templates.py b/appveyor/windows_templates/build_windows_templates.py index 456ae19f3e..59f1ea5a75 100644 --- a/appveyor/windows_templates/build_windows_templates.py +++ b/appveyor/windows_templates/build_windows_templates.py @@ -11,54 +11,67 @@ import subprocess import sys import time - from typing import Callable parser = argparse.ArgumentParser(description="Build windows templates.") parser.add_argument( - "--build_dir", default=r"C:\grrbuild", help="GRR build directory.") + "--build_dir", default=r"C:\grrbuild", help="GRR build directory." +) parser.add_argument( "--grr_src", default=r"C:\grrbuild\grr", - help="Location of the grr src code. If it doesn't exist " - " at this path we'll try to check it out from github.") + help=( + "Location of the grr src code. If it doesn't exist " + " at this path we'll try to check it out from github." + ), +) parser.add_argument( "--output_dir", default=r"C:\grrbuild\output", - help="Destination directory for the templates.") + help="Destination directory for the templates.", +) parser.add_argument( "--test_repack_install", action="store_true", default=False, - help="Test repacking by calling repack on the template after building," - "then try and install the result. For use by integration tests. If you use " - "this option you must run as admin.") + help=( + "Test repacking by calling repack on the template after building,then" + " try and install the result. For use by integration tests. If you use" + " this option you must run as admin." + ), +) parser.add_argument( "--wheel_dir", default=None, - help="A directory that will be passed to pip as the wheel-dir parameter.") + help="A directory that will be passed to pip as the wheel-dir parameter.", +) parser.add_argument( "--expect_service_running", dest="expect_service_running", action="store_true", - help="Triggers whether after installation the GRR service should be " - "running or not. Used for testing the installation.") + help=( + "Triggers whether after installation the GRR service should be " + "running or not. Used for testing the installation." + ), +) parser.add_argument( "--noexpect_service_running", dest="expect_service_running", - action="store_false") + action="store_false", +) parser.set_defaults(expect_service_running=True) parser.add_argument( "--config", default="", - help="Path to the config file to be used when building templates.") + help="Path to the config file to be used when building templates.", +) args = parser.parse_args() @@ -79,13 +92,16 @@ def _FileRetryLoop(path: str, f: Callable[[], None]) -> None: return except OSError as e: attempts += 1 - if (e.errno == errno.EACCES and - attempts < _FILE_RETRY_LOOP_RETRY_TIME_SECS): + if ( + e.errno == errno.EACCES + and attempts < _FILE_RETRY_LOOP_RETRY_TIME_SECS + ): # The currently installed GRR process may stick around for a few # seconds after the service is terminated (keeping the contents of # the installation directory locked). - logging.info("Permission-denied error while trying to process %s.", - path) + logging.info( + "Permission-denied error while trying to process %s.", path + ) time.sleep(1) else: raise @@ -99,27 +115,6 @@ def _Rename(src: str, dst: str) -> None: _FileRetryLoop(src, lambda: os.rename(src, dst)) -def _RmTreePseudoTransactional(path: str) -> None: - """Removes `path`. - - Makes sure that either `path` is gone or that it is still present as - it was. - - Args: - path: The path to remove. - """ - temp_path = f"{path}_orphaned_{int(time.time())}" - logging.info("Trying to rename %s -> %s.", path, temp_path) - - _Rename(path, temp_path) - - try: - logging.info("Trying to remove %s.", temp_path) - _RmTree(temp_path) - except: # pylint: disable=bare-except - logging.info("Failed to remove %s. Ignoring.", temp_path, exc_info=True) - - def _VerboseCheckCall(params): logging.info("Running: %s", params) @@ -141,8 +136,9 @@ def SetupVars(self): self.virtualenv64 = os.path.join(args.build_dir, "python_64") self.grr_client_build64 = "grr_client_build" - self.virtualenv_python64 = os.path.join(self.virtualenv64, - r"Scripts\python.exe") + self.virtualenv_python64 = os.path.join( + self.virtualenv64, r"Scripts\python.exe" + ) self.git = r"git" @@ -180,13 +176,17 @@ def Clean(self): def GitCheckoutGRR(self): os.chdir(args.build_dir) subprocess.check_call( - [self.git, "clone", "https://github.com/google/grr.git"]) + [self.git, "clone", "https://github.com/google/grr.git"] + ) def MakeProtoSdist(self): os.chdir(os.path.join(args.grr_src, "grr/proto")) subprocess.check_call([ - self.virtualenv_python64, "setup.py", "sdist", "--formats=zip", - "--dist-dir=%s" % args.build_dir + self.virtualenv_python64, + "setup.py", + "sdist", + "--formats=zip", + "--dist-dir=%s" % args.build_dir, ]) return glob.glob( os.path.join(args.build_dir, "grr_response_proto-*.zip") @@ -195,8 +195,12 @@ def MakeProtoSdist(self): def MakeCoreSdist(self): os.chdir(os.path.join(args.grr_src, "grr/core")) subprocess.check_call([ - self.virtualenv_python64, "setup.py", "sdist", "--formats=zip", - "--dist-dir=%s" % args.build_dir, "--no-sync-artifacts" + self.virtualenv_python64, + "setup.py", + "sdist", + "--formats=zip", + "--dist-dir=%s" % args.build_dir, + "--no-sync-artifacts", ]) return glob.glob( os.path.join(args.build_dir, "grr_response_core-*.zip") @@ -205,8 +209,11 @@ def MakeCoreSdist(self): def MakeClientSdist(self): os.chdir(os.path.join(args.grr_src, "grr/client/")) subprocess.check_call([ - self.virtualenv_python64, "setup.py", "sdist", "--formats=zip", - "--dist-dir=%s" % args.build_dir + self.virtualenv_python64, + "setup.py", + "sdist", + "--formats=zip", + "--dist-dir=%s" % args.build_dir, ]) return glob.glob( os.path.join(args.build_dir, "grr_response_client-*.zip") @@ -215,8 +222,11 @@ def MakeClientSdist(self): def MakeClientBuilderSdist(self): os.chdir(os.path.join(args.grr_src, "grr/client_builder/")) subprocess.check_call([ - self.virtualenv_python64, "setup.py", "sdist", "--formats=zip", - "--dist-dir=%s" % args.build_dir + self.virtualenv_python64, + "setup.py", + "sdist", + "--formats=zip", + "--dist-dir=%s" % args.build_dir, ]) return glob.glob( os.path.join(args.build_dir, "grr_response_client_builder-*.zip") @@ -242,8 +252,12 @@ def BuildTemplates(self): """ if args.config: build_args = [ - "--verbose", "--config", args.config, "build", "--output", - args.output_dir + "--verbose", + "--config", + args.config, + "build", + "--output", + args.output_dir, ] else: build_args = ["--verbose", "build", "--output", args.output_dir] @@ -268,9 +282,11 @@ def _WixToolsPath(self) -> str: def _RepackTemplates(self): """Repack templates with a dummy config.""" dummy_config = os.path.join( - args.grr_src, "grr/test/grr_response_test/test_data/dummyconfig.yaml") - template_amd64 = glob.glob(os.path.join(args.output_dir, - "*_amd64*.zip")).pop() + args.grr_src, "grr/test/grr_response_test/test_data/dummyconfig.yaml" + ) + template_amd64 = glob.glob( + os.path.join(args.output_dir, "*_amd64*.zip") + ).pop() fleetspeak_config = os.path.join( args.grr_src, @@ -345,8 +361,9 @@ def _CheckInstallSuccess(self): raise RuntimeError("Install failed, no files at: %s" % self.install_path) try: - output = subprocess.check_output(["sc", "query", self.service_name], - encoding="utf-8") + output = subprocess.check_output( + ["sc", "query", self.service_name], encoding="utf-8" + ) service_running = "RUNNING" in output except subprocess.CalledProcessError as e: output = e.output @@ -370,13 +387,15 @@ def _CheckInstallSuccess(self): if self.expect_service_running: if not service_running: raise RuntimeError( - "GRR service not running after install, sc query output: %s" % - output) + "GRR service not running after install, sc query output: %s" + % output + ) else: if service_running: raise RuntimeError( "GRR service running after install with expect_service_running == " - "False, sc query output: %s" % output) + "False, sc query output: %s" % output + ) def _InstallInstallers(self): """Install the installer built by RepackTemplates.""" diff --git a/colab/grr_colab/__init__.py b/colab/grr_colab/__init__.py index dc4a3567c7..df2f808875 100644 --- a/colab/grr_colab/__init__.py +++ b/colab/grr_colab/__init__.py @@ -424,7 +424,6 @@ def collect( args = flows_pb2.ArtifactCollectorFlowArgs() args.artifact_list.append(artifact) - args.apply_parsers = True try: ac = self._client.CreateFlow(name='ArtifactCollectorFlow', args=args) diff --git a/colab/grr_colab/client_test.py b/colab/grr_colab/client_test.py index 3550505643..a261ad1fdb 100644 --- a/colab/grr_colab/client_test.py +++ b/colab/grr_colab/client_test.py @@ -22,7 +22,6 @@ from grr_response_server import data_store from grr_response_server.rdfvalues import mig_objects from grr.test_lib import osquery_test_lib -from grr.test_lib import parser_test_lib from grr.test_lib import test_lib @@ -88,7 +87,6 @@ def testWithHostname_MultipleClients(self): self.assertEqual(context.exception.hostname, hostname) self.assertItemsEqual([client_id1, client_id2], context.exception.clients) - @parser_test_lib.WithAllParsers def testWithHostname_NoClients(self): hostname = 'noclients.loc.group.example.com' @@ -217,7 +215,6 @@ def testHostname(self): client = grr_colab.Client.with_id(ClientTest.FAKE_CLIENT_ID) self.assertEqual(client.hostname, hostname) - @parser_test_lib.WithAllParsers def testHostname_AfterInterrogate(self): data_store.REL_DB.WriteClientMetadata(client_id=ClientTest.FAKE_CLIENT_ID) @@ -237,7 +234,6 @@ def testIfaces(self): self.assertLen(client.ifaces, 1) self.assertEqual(client.ifaces[0].ifname, ifname) - @parser_test_lib.WithAllParsers def testIfaces_AfterInterrogate(self): data_store.REL_DB.WriteClientMetadata(client_id=ClientTest.FAKE_CLIENT_ID) @@ -281,7 +277,6 @@ def testArch(self): client = grr_colab.Client.with_id(ClientTest.FAKE_CLIENT_ID) self.assertEqual(client.arch, arch) - @parser_test_lib.WithAllParsers def testArch_AfterInterrogate(self): data_store.REL_DB.WriteClientMetadata(client_id=ClientTest.FAKE_CLIENT_ID) @@ -300,7 +295,6 @@ def testKernel(self): client = grr_colab.Client.with_id(ClientTest.FAKE_CLIENT_ID) self.assertEqual(client.kernel, kernel) - @parser_test_lib.WithAllParsers def testKernel_AfterInterrogate(self): data_store.REL_DB.WriteClientMetadata(client_id=ClientTest.FAKE_CLIENT_ID) @@ -389,7 +383,6 @@ def ProcessApproval(): finally: thread.join() - @parser_test_lib.WithAllParsers def testInterrogate(self): data_store.REL_DB.WriteClientMetadata(client_id=ClientTest.FAKE_CLIENT_ID) client = grr_colab.Client.with_id(ClientTest.FAKE_CLIENT_ID) @@ -497,7 +490,6 @@ def testOsquery_WithoutApproval(self): self.assertEqual(context.exception.client_id, ClientTest.FAKE_CLIENT_ID) - @parser_test_lib.WithAllParsers def testCollect(self): data_store.REL_DB.WriteClientMetadata(client_id=ClientTest.FAKE_CLIENT_ID) diff --git a/grr/client/grr_response_client/client_actions/timeline.py b/grr/client/grr_response_client/client_actions/timeline.py index aaa712f78f..6f01aa6004 100644 --- a/grr/client/grr_response_client/client_actions/timeline.py +++ b/grr/client/grr_response_client/client_actions/timeline.py @@ -4,13 +4,13 @@ import hashlib import os import stat as stat_mode -from typing import Iterator -from typing import Optional +from typing import Iterator, Optional import psutil from grr_response_client import actions from grr_response_core.lib import rdfvalue +from grr_response_core.lib.rdfvalues import mig_timeline from grr_response_core.lib.rdfvalues import protodict as rdf_protodict from grr_response_core.lib.rdfvalues import timeline as rdf_timeline from grr_response_core.lib.util import iterator @@ -33,7 +33,10 @@ def Run(self, args: rdf_timeline.TimelineArgs) -> None: """Executes the client action.""" fstype = GetFilesystemType(args.root) entries = iterator.Counted(Walk(args.root)) - for entry_batch in rdf_timeline.TimelineEntry.SerializeStream(entries): + proto_entries = ( + mig_timeline.ToProtoTimelineEntry(entry) for entry in entries + ) + for entry_batch in rdf_timeline.SerializeTimelineEntryStream(proto_entries): entry_batch_blob = rdf_protodict.DataBlob(data=entry_batch) self.SendReply(entry_batch_blob, session_id=self._TRANSFER_STORE_ID) diff --git a/grr/client/grr_response_client/client_startup.py b/grr/client/grr_response_client/client_startup.py index 01fded0c63..c9944d0148 100644 --- a/grr/client/grr_response_client/client_startup.py +++ b/grr/client/grr_response_client/client_startup.py @@ -6,7 +6,6 @@ from grr_response_core import config from grr_response_core.config import contexts from grr_response_core.lib import config_lib -from grr_response_core.lib.parsers import all as all_parsers def ClientInit(): @@ -17,7 +16,6 @@ def ClientInit(): config_lib.ParseConfigCommandLine() client_logging.LogInit() - all_parsers.Register() if not config.CONFIG.ContextApplied(contexts.CLIENT_BUILD_CONTEXT): config.CONFIG.Persist("Client.labels") diff --git a/grr/core/grr_response_core/artifacts/README.md b/grr/core/grr_response_core/artifacts/README.md index 24704fffb3..6808d0b728 100644 --- a/grr/core/grr_response_core/artifacts/README.md +++ b/grr/core/grr_response_core/artifacts/README.md @@ -1,13 +1,12 @@ # This directory is reserved for external artifacts -The Makefile removes ``*.yaml`` from this directory when syncing the external -repo located [here] (https://github.com/ForensicArtifacts/artifacts). +The Makefile removes `*.yaml` from this directory when syncing the external repo +located [here](https://github.com/ForensicArtifacts/artifacts). ## Where artifacts go -- Private artifacts should go in ``artifacts/local`` -- Public artifacts that are non GRR specific should be submitted to the external -repo. -- Public artifacts that call GRR functions with ``LIST_FILES``, - ``GRR_CLIENT_ACTION``, ``GREP`` etc. should live in - ``artifacts/flow_templates`` +- Private artifacts should go in `artifacts/local`. +- Public artifacts that are non GRR specific should be submitted to the + external repo. +- Public artifacts that call GRR functions with `LIST_FILES`, + `GRR_CLIENT_ACTION` etc. should live in `artifacts/flow_templates`. diff --git a/grr/core/grr_response_core/artifacts/flow_templates/linux.yaml b/grr/core/grr_response_core/artifacts/flow_templates/linux.yaml index b958309724..0d55447ea1 100644 --- a/grr/core/grr_response_core/artifacts/flow_templates/linux.yaml +++ b/grr/core/grr_response_core/artifacts/flow_templates/linux.yaml @@ -13,17 +13,6 @@ labels: [Users] provides: [users.homedir, users.username, users.last_logon, users.full_name] supported_os: [Linux] --- -name: LinuxPasswdHomedirs -doc: Grep passwd file for user homedirs. -sources: -- type: GREP - attributes: - paths: ['/etc/passwd'] - content_regex_list: ["^%%users.username%%:[^:]*:[^:]*:[^:]*:[^:]*:[^:]+:[^:]*\n"] -provides: [users.homedir, users.full_name] -labels: [Authentication] -supported_os: [Linux] ---- name: RedhatYumPackagesList doc: Linux output of yum list installed. sources: diff --git a/grr/core/grr_response_core/config/gui.py b/grr/core/grr_response_core/config/gui.py index e529767d5e..005301e838 100644 --- a/grr/core/grr_response_core/config/gui.py +++ b/grr/core/grr_response_core/config/gui.py @@ -8,9 +8,12 @@ config_lib.DEFINE_integer("AdminUI.port", 8000, "port to listen on") config_lib.DEFINE_integer( - "AdminUI.port_max", None, "If set and AdminUI.port is in use, attempt to " + "AdminUI.port_max", + None, + "If set and AdminUI.port is in use, attempt to " "use ports between AdminUI.port and " - "AdminUI.port_max.") + "AdminUI.port_max.", +) # Override this if you want to access admin ui extenally. Make sure it is # secured (i.e. AdminUI.webauth_manager is not NullWebAuthManager)! @@ -19,86 +22,124 @@ config_lib.DEFINE_string( "AdminUI.document_root", "%(grr_response_server/gui/static@grr-response-server|resource)", - "The main path to the static HTML pages.") + "The main path to the static HTML pages.", +) config_lib.DEFINE_string( "AdminUI.template_root", "%(grr_response_server/gui/templates@grr-response-server|resource)", - "The main path to the templates.") + "The main path to the templates.", +) config_lib.DEFINE_string( - "AdminUI.webauth_manager", "NullWebAuthManager", - "The web auth manager for controlling access to the UI.") + "AdminUI.webauth_manager", + "NullWebAuthManager", + "The web auth manager for controlling access to the UI.", +) config_lib.DEFINE_string( - "AdminUI.remote_user_header", "X-Remote-User", + "AdminUI.remote_user_header", + "X-Remote-User", "Header containing authenticated user's username. " - "Used by RemoteUserWebAuthManager.") + "Used by RemoteUserWebAuthManager.", +) config_lib.DEFINE_string( - "AdminUI.remote_email_header", "X-Remote-Extra-Email", + "AdminUI.remote_email_header", + "X-Remote-Extra-Email", "Header containing authenticated user's e-mail address. " "If present, the e-mail address of a newly created GRR user will be set " "to the header's value. " - "Used by RemoteUserWebAuthManager.") + "Used by RemoteUserWebAuthManager.", +) config_lib.DEFINE_list( - "AdminUI.remote_user_trusted_ips", ["127.0.0.1"], + "AdminUI.remote_user_trusted_ips", + ["127.0.0.1"], "Only requests coming from these IPs will be processed " - "by RemoteUserWebAuthManager.") + "by RemoteUserWebAuthManager.", +) -config_lib.DEFINE_string("AdminUI.firebase_api_key", None, - "Firebase API key. Used by FirebaseWebAuthManager.") -config_lib.DEFINE_string("AdminUI.firebase_auth_domain", None, - "Firebase API key. Used by FirebaseWebAuthManager.") config_lib.DEFINE_string( - "AdminUI.firebase_auth_provider", "GoogleAuthProvider", + "AdminUI.firebase_api_key", + None, + "Firebase API key. Used by FirebaseWebAuthManager.", +) +config_lib.DEFINE_string( + "AdminUI.firebase_auth_domain", + None, + "Firebase API key. Used by FirebaseWebAuthManager.", +) +config_lib.DEFINE_string( + "AdminUI.firebase_auth_provider", + "GoogleAuthProvider", "Firebase auth provider (see " "https://firebase.google.com/docs/auth/web/start). Used by " - "FirebaseWebAuthManager.") + "FirebaseWebAuthManager.", +) config_lib.DEFINE_string( - "AdminUI.csrf_secret_key", "CHANGE_ME", + "AdminUI.csrf_secret_key", + "CHANGE_ME", "This is a secret key that should be set in the server " - "config. It is used in CSRF protection.") + "config. It is used in CSRF protection.", +) -config_lib.DEFINE_bool("AdminUI.enable_ssl", False, - "Turn on SSL. This needs AdminUI.ssl_cert to be set.") +config_lib.DEFINE_bool( + "AdminUI.enable_ssl", + False, + "Turn on SSL. This needs AdminUI.ssl_cert to be set.", +) -config_lib.DEFINE_string("AdminUI.ssl_cert_file", "", - "The SSL certificate to use.") +config_lib.DEFINE_string( + "AdminUI.ssl_cert_file", "", "The SSL certificate to use." +) config_lib.DEFINE_string( - "AdminUI.ssl_key_file", None, + "AdminUI.ssl_key_file", + None, "The SSL key to use. The key may also be part of the cert file, in which " - "case this can be omitted.") + "case this can be omitted.", +) -config_lib.DEFINE_string("AdminUI.url", "http://localhost:8000/", - "The direct external URL for the user interface.") +config_lib.DEFINE_string( + "AdminUI.url", + "http://localhost:8000/", + "The direct external URL for the user interface.", +) config_lib.DEFINE_bool( - "AdminUI.use_precompiled_js", False, + "AdminUI.use_precompiled_js", + False, "If True - use Closure-compiled JS bundle. This flag " - "is experimental and is not properly supported yet.") + "is experimental and is not properly supported yet.", +) config_lib.DEFINE_string( - "AdminUI.export_command", "/usr/bin/grr_api_shell " - "'%(AdminUI.url)'", "Command to show in the fileview for downloading the " - "files from the command line.") + "AdminUI.export_command", + "/usr/bin/grr_api_shell '%(AdminUI.url)'", + "Command to show in the fileview for downloading the " + "files from the command line.", +) -config_lib.DEFINE_string("AdminUI.heading", "", - "Dashboard heading displayed in the Admin UI.") +config_lib.DEFINE_string( + "AdminUI.heading", "", "Dashboard heading displayed in the Admin UI." +) -config_lib.DEFINE_string("AdminUI.report_url", - "https://github.com/google/grr/issues", - "URL of the 'Report a problem' link.") +config_lib.DEFINE_string( + "AdminUI.report_url", + "https://github.com/google/grr/issues", + "URL of the 'Report a problem' link.", +) -config_lib.DEFINE_string("AdminUI.help_url", "/help/index.html", - "URL of the 'Help' link.") +config_lib.DEFINE_string( + "AdminUI.help_url", "/help/index.html", "URL of the 'Help' link." +) config_lib.DEFINE_string( "AdminUI.docs_location", "https://grr-doc.readthedocs.io/en/v%(Source.version_major)." "%(Source.version_minor).%(Source.version_revision)", - "Base path for GRR documentation. ") + "Base path for GRR documentation. ", +) # This accepts a comma-separated list of multiple plugins. Ideally, we'd use @@ -123,102 +164,133 @@ ) config_lib.DEFINE_semantic_struct( - rdf_config.AdminUIClientWarningsConfigOption, "AdminUI.client_warnings", - None, "List of per-client-label warning messages to be shown.") + rdf_config.AdminUIHuntConfig, + "AdminUI.hunt_config", + None, + "List of labels to include or exclude by default when hunts are created," + " and warning message to be shown.", +) + +config_lib.DEFINE_semantic_struct( + rdf_config.AdminUIClientWarningsConfigOption, + "AdminUI.client_warnings", + None, + "List of per-client-label warning messages to be shown.", +) config_lib.DEFINE_string( - "AdminUI.analytics_id", None, + "AdminUI.analytics_id", + None, "The Google Analytics ID to use for logging interactions when users access " "the web UI. If None (default), no Analytics script will be included and " - "no events will be logged.") + "no events will be logged.", +) config_lib.DEFINE_bool( - "AdminUI.rapid_hunts_enabled", True, + "AdminUI.rapid_hunts_enabled", + True, "If True, enabled 'rapid hunts' feature in the Hunts Wizard. Rapid hunts " "support will automatically set client rate to 0 in FileFinder hunts " - "matching certain criteria (no recursive globs, no file downloads, etc).") - -# Temporary option that allows limiting access to legacy UI renderers. Useful -# when giving access to GRR AdminUI to parties that have to use the HTTP API -# only. -# TODO(user): remove as soon as legacy rendering system is removed. -config_lib.DEFINE_list( - "AdminUI.legacy_renderers_allowed_groups", [], - "Users belonging to these groups can access legacy GRR renderers, " - "which are still used for some GRR features (manage binaries, legacy " - "browse virtual filesystem pane, etc). If this option is not set, then " - "no additional checks are performed when legacy renderers are used.") + "matching certain criteria (no recursive globs, no file downloads, etc).", +) config_lib.DEFINE_string( - "AdminUI.debug_impersonate_user", None, + "AdminUI.debug_impersonate_user", + None, "NOTE: for debugging purposes only! If set, every request AdminUI gets " "will be attributed to the specified user. Useful for checking how AdminUI " - "looks like for an access-restricted user.") + "looks like for an access-restricted user.", +) config_lib.DEFINE_bool( - "AdminUI.headless", False, + "AdminUI.headless", + False, "When running in headless mode, AdminUI ignores checks for JS/CSS compiled " "bundles being present. AdminUI.headless=True should be used to run " - "the AdminUI as an API endpoint only.") + "the AdminUI as an API endpoint only.", +) # Configuration requirements for Cloud IAP Setup. config_lib.DEFINE_string( - "AdminUI.google_cloud_project_id", None, + "AdminUI.google_cloud_project_id", + None, "Cloud Project ID for IAP. This must be set if " - "the IAPWebAuthManager is used.") + "the IAPWebAuthManager is used.", +) config_lib.DEFINE_string( - "AdminUI.google_cloud_backend_service_id", None, + "AdminUI.google_cloud_backend_service_id", + None, "GCP Cloud Backend Service ID for IAP. This must be set if " - "the IAPWebAuthManager is used.") + "the IAPWebAuthManager is used.", +) config_lib.DEFINE_string( - "AdminUI.profile_image_url", None, + "AdminUI.profile_image_url", + None, "URL to user's profile images. The placeholder {username} is replaced with " - "the actual value. E.g. https://avatars.example.com/{username}.jpg") + "the actual value. E.g. https://avatars.example.com/{username}.jpg", +) -config_lib.DEFINE_bool("AdminUI.csp_enabled", False, - "If True, enable the Content Security Policy header.") +config_lib.DEFINE_bool( + "AdminUI.csp_enabled", + False, + "If True, enable the Content Security Policy header.", +) config_lib.DEFINE_string( - "AdminUI.csp_policy", "{}", + "AdminUI.csp_policy", + "{}", "A JSON string of keys to lists of values to include in the Content " - "Security Policy header. E.g. {\"default-src\": [\"https:\"]}") + 'Security Policy header. E.g. {"default-src": ["https:"]}', +) config_lib.DEFINE_bool( - "AdminUI.csp_report_only", True, + "AdminUI.csp_report_only", + True, "If True, set the Content Security Policy header to 'report only' mode. " - "This flag has no effect if AdminUI.csp_enabled is False.") + "This flag has no effect if AdminUI.csp_enabled is False.", +) config_lib.DEFINE_bool( - "AdminUI.trusted_types_enabled", True, + "AdminUI.trusted_types_enabled", + True, "If True, enable the Trusted Types feature of the Content Security Policy " "header. Combined with setting 'AdminUI.trusted_types_report_only' to " "True, this setting will have no effect on the behavior of GRR - it will " "only report Trusted Types violations in your browser developer console. " "Trusted Types can prevent most common XSS attacks, see " - "https://web.dev/trusted-types/ for more information.") + "https://web.dev/trusted-types/ for more information.", +) config_lib.DEFINE_bool( - "AdminUI.trusted_types_report_only", True, + "AdminUI.trusted_types_report_only", + True, "If True, set the Trusted Types Content Security Policy header to 'report " "only' mode. When in 'report only' mode, Trusted Types violations will be " "logged to the browser developer console, but the behavior of GRR will " "not change. When this flag is set to False, Trusted Types rules will be " "enforced. This flag has no effect if AdminUI.trusted_types_enabled is " - "False. See https://web.dev/trusted-types/ for more information.") + "False. See https://web.dev/trusted-types/ for more information.", +) config_lib.DEFINE_string( - "AdminUI.csp_report_uri", None, - "URL to report Content Security Policy violations to.") + "AdminUI.csp_report_uri", + None, + "URL to report Content Security Policy violations to.", +) config_lib.DEFINE_list( - "AdminUI.csp_include_url_prefixes", ["/v2"], + "AdminUI.csp_include_url_prefixes", + ["/v2"], "Only requests for URLs with these prefixes will have a Content Security " - "Policy header added. Leave empty to include all URLs.") + "Policy header added. Leave empty to include all URLs.", +) config_lib.DEFINE_list( - "AdminUI.csp_exclude_url_prefixes", [], + "AdminUI.csp_exclude_url_prefixes", + [], "Requests for URLs with these prefixes will not have a Content Security " "Policy header added. This is applied to URLs after applying " - "AdminUI.csp_include_url_prefixes.") + "AdminUI.csp_include_url_prefixes.", +) diff --git a/grr/core/grr_response_core/lib/artifact_utils.py b/grr/core/grr_response_core/lib/artifact_utils.py index 50bc335ff6..fd09685403 100644 --- a/grr/core/grr_response_core/lib/artifact_utils.py +++ b/grr/core/grr_response_core/lib/artifact_utils.py @@ -6,9 +6,8 @@ """ import re -from typing import Iterable, Optional +from typing import Sequence -from grr_response_core.lib import interpolation from grr_response_proto import knowledge_base_pb2 @@ -24,28 +23,6 @@ class ArtifactProcessingError(Error): """Unable to process artifact.""" -class KbInterpolationMissingAttributesError(Error): - """An exception class for missing knowledgebase attributes.""" - - def __init__(self, attrs: Iterable[str]) -> None: - message = "Some attributes could not be located in the knowledgebase: {}" - message = message.format(", ".join(attrs)) - super().__init__(message) - - self.attrs = list(attrs) - - -class KbInterpolationUnknownAttributesError(Error): - """An exception class for non-existing knowledgebase attributes.""" - - def __init__(self, attrs: Iterable[str]) -> None: - message = "Some attributes are not part of the knowledgebase: {}" - message = message.format(", ".join(attrs)) - super().__init__(message) - - self.attrs = list(attrs) - - class KnowledgeBaseUninitializedError(Error): """Attempt to process artifact without a valid Knowledge Base.""" @@ -57,110 +34,6 @@ class KnowledgeBaseAttributesMissingError(Error): INTERPOLATED_REGEX = re.compile(r"%%([^%]+?)%%") -def InterpolateKbAttributes( - pattern: str, - knowledge_base: Optional[knowledge_base_pb2.KnowledgeBase], -) -> Iterable[str]: - """Interpolate all knowledgebase attributes in pattern. - - Args: - pattern: A string with potential interpolation markers. For example: - "/home/%%users.username%%/Downloads/" - knowledge_base: The knowledge_base to interpolate parameters from, if - knowledge_base is None, then the pattern must not have placeholders. - - Raises: - KbInterpolationMissingAttributesError: If any of the required pattern - parameters is not present in the knowledgebase. - KbInterpolationUnknownAttributesError: If any of the specified pattern - parameters is not a valid knowledgebase attribute. - KnowledgeBaseUninitializedError: If the pattern requires knowledgebase - attributes, but the knowledgebase is not initialized. - - Returns: - An iterator over all unique strings generated by expanding the pattern. - """ - - # TODO(hanuszczak): Control flow feels a bit awkward here because of error - # handling that tries not to break any functionality. With the new utilities - # it should be possible to improve the code, changing the behaviour to a more - # sane one. - interpolator = interpolation.Interpolator(pattern) - - if not knowledge_base: - if interpolator.Vars() or interpolator.Scopes(): - raise KnowledgeBaseUninitializedError( - "Knowledge base is not initialized, but the pattern requires it." - ) - return interpolator.Interpolate() - - missing_attr_names = set() - unknown_attr_names = set() - - for var_id in interpolator.Vars(): - var_name = interpolation.GetVarName(var_id) - - if var_name not in knowledge_base.DESCRIPTOR.fields_by_name: - unknown_attr_names.add(var_name) - continue - - value = getattr(knowledge_base, var_name) - if not value: - missing_attr_names.add(var_name) - continue - - interpolator.BindVar(var_name, value) # pytype: disable=wrong-arg-types - - for scope_id in interpolator.Scopes(): - scope_name = interpolation.GetScopeName(scope_id) - - # We are currently only having one scope which is `users`. Restricting the - # implementation to this, to not having to differentiate between nested - # lists, protos, deeply nested scopes etc. - if scope_name != "users": - unknown_attr_names.add(scope_name) - continue - - users = knowledge_base.users - if not users: - missing_attr_names.add(scope_name) - continue - - scope_var_ids = interpolator.ScopeVars(scope_id) - scope_bound = False - scope_missing_var_names = set() - for user in users: - bindings = {} - for scope_var_id in scope_var_ids: - scope_var_name = interpolation.GetVarName(scope_var_id) - - if scope_var_name not in user.DESCRIPTOR.fields_by_name: - unknown_attr_names.add(f"{scope_name}.{scope_var_name}") - continue - - value = getattr(user, scope_var_name) - if not value: - scope_missing_var_names.add(f"{scope_name}.{scope_var_name}") - continue - - bindings[scope_var_id] = value - - if set(bindings) == set(scope_var_ids): - interpolator.BindScope(scope_id, bindings) - scope_bound = True - - if not scope_bound: - missing_attr_names.update(scope_missing_var_names) - - if unknown_attr_names: - raise KbInterpolationUnknownAttributesError(unknown_attr_names) - - if missing_attr_names: - raise KbInterpolationMissingAttributesError(missing_attr_names) - - return interpolator.Interpolate() - - def GetWindowsEnvironmentVariablesMap(knowledge_base): """Return a dictionary of environment variables and their values. @@ -223,32 +96,261 @@ def GetWindowsEnvironmentVariablesMap(knowledge_base): return environ_vars -def ExpandWindowsEnvironmentVariables(data_string, knowledge_base): - r"""Take a string and expand any windows environment variables. +def ExpandKnowledgebaseWindowsEnvVars( + unexpanded_kb: knowledge_base_pb2.KnowledgeBase, +) -> knowledge_base_pb2.KnowledgeBase: + """Expands all Windows environment variable values in the given knowledgebase. + + Unexpanded values can contain references to other environment variables, e.g. + `%SystemRoot/System32`. Such references are expanded using knowledgebase + values recursively, e.g. the above could be expanded to `C:/System32`. + + If an environment variable value contains a reference that cannot be expanded, + this function will not raise but rather leave it in unexpanded form (similarly + to what Windows shell does). + + If unexpanded references form a cycle, this function will raise. Args: - data_string: A string, e.g. "%SystemRoot%\\LogFiles" - knowledge_base: A knowledgebase object. + unexpanded_kb: A knowledgebase with environment variables to expand. Returns: - A string with available environment variables expanded. If we can't expand - we just return the string with the original variables. + A knowledgebase in which all environment variables are expanded. """ - win_environ_regex = re.compile(r"%([^%]+?)%") - components = [] - offset = 0 - for match in win_environ_regex.finditer(data_string): - components.append(data_string[offset : match.start()]) - - # KB environment variables are prefixed with environ_. - kb_value = getattr( - knowledge_base, "environ_%s" % match.group(1).lower(), None - ) - if isinstance(kb_value, str) and kb_value: - components.append(kb_value) + if unexpanded_kb.os != "Windows": + raise ValueError(f"Invalid system: {unexpanded_kb.os!r}") + + kb = knowledge_base_pb2.KnowledgeBase( + environ_path="%SystemRoot%\\;%SystemRoot%\\System32\\;%SystemRoot%\\System32\\wbem\\", + environ_temp="%SystemRoot%\\TEMP", + environ_allusersappdata="%ProgramData%", + environ_allusersprofile="%ProgramData%", + environ_commonprogramfiles="%ProgramFiles%\\Common Files", + environ_commonprogramfilesx86="%ProgramFiles(x86)%\\Common Files", + environ_comspec="%SystemRoot%\\System32\\cmd.exe", + environ_driverdata="%SystemRoot%\\System32\\Drivers\\DriverData", + environ_programfiles="%SystemDrive%\\Program Files", + environ_programfilesx86="%SystemDrive%\\Program Files (x86)", + environ_programdata="%SystemDrive%\\ProgramData", + environ_systemdrive="C:", + environ_systemroot="%SystemDrive%\\Windows", + environ_windir="%SystemRoot%", + ) + kb.MergeFrom(unexpanded_kb) + + already_expanded_env_var_refs: dict[str, str] = dict() + currently_expanded_env_var_refs: set[str] = set() + + def Expand(unexpanded: str) -> str: + expanded = "" + offset = 0 + + for match in re.finditer("%[^%]+?%", unexpanded): + env_var_ref = match.group(0).upper() + + expanded += unexpanded[offset : match.start()] + offset += match.end() + + if env_var_ref in already_expanded_env_var_refs: + expanded += already_expanded_env_var_refs[env_var_ref] + continue + + if env_var_ref in currently_expanded_env_var_refs: + raise ValueError(f"Circular dependency involving {env_var_ref!r}") + + if env_var_ref == "%PATH%": + value = kb.environ_path + elif env_var_ref == "%TEMP%": + value = kb.environ_temp + elif env_var_ref == "%ALLUSERSAPPDATA%": + value = kb.environ_allusersappdata + elif env_var_ref == "%ALLUSERSPROFILE%": + value = kb.environ_allusersprofile + elif env_var_ref == "%COMMONPROGRAMFILES%": + value = kb.environ_commonprogramfiles + elif env_var_ref == "%COMMONPROGRAMFILES(X86)%": + value = kb.environ_commonprogramfilesx86 + elif env_var_ref == "%COMSPEC%": + value = kb.environ_comspec + elif env_var_ref == "%DRIVERDATA%": + value = kb.environ_driverdata + elif env_var_ref == "%PROGRAMFILES%": + value = kb.environ_programfiles + elif env_var_ref == "%PROGRAMFILES(X86)%": + value = kb.environ_programfilesx86 + elif env_var_ref == "%PROGRAMDATA%": + value = kb.environ_programdata + elif env_var_ref == "%SYSTEMDRIVE%": + value = kb.environ_systemdrive + elif env_var_ref == "%SYSTEMROOT%": + value = kb.environ_systemroot + elif env_var_ref == "%WINDIR%": + value = kb.environ_windir + else: + # We use original match instead of `env_var_ref` as the latter was case + # corrected. + expanded += match.group(0) + continue + + currently_expanded_env_var_refs.add(env_var_ref) + already_expanded_env_var_refs[env_var_ref] = Expand(value) + currently_expanded_env_var_refs.remove(env_var_ref) + + expanded += already_expanded_env_var_refs[env_var_ref] + + expanded += unexpanded[offset:] + return expanded + + kb.environ_path = Expand(kb.environ_path) + kb.environ_temp = Expand(kb.environ_temp) + kb.environ_allusersappdata = Expand(kb.environ_allusersappdata) + kb.environ_allusersprofile = Expand(kb.environ_allusersprofile) + kb.environ_commonprogramfiles = Expand(kb.environ_commonprogramfiles) + kb.environ_commonprogramfilesx86 = Expand(kb.environ_commonprogramfilesx86) + kb.environ_comspec = Expand(kb.environ_comspec) + kb.environ_driverdata = Expand(kb.environ_driverdata) + kb.environ_profilesdirectory = Expand(kb.environ_profilesdirectory) + kb.environ_programfiles = Expand(kb.environ_programfiles) + kb.environ_programfilesx86 = Expand(kb.environ_programfilesx86) + kb.environ_programdata = Expand(kb.environ_programdata) + kb.environ_systemdrive = Expand(kb.environ_systemdrive) + kb.environ_systemroot = Expand(kb.environ_systemroot) + kb.environ_windir = Expand(kb.environ_windir) + return kb + + +class KnowledgeBaseInterpolation: + """Interpolation of the given pattern with knowledgebase values. + + Pattern can have placeholder variables like `%%os%%` or `%%fqdn%%` that will + be replaced by concrete values from the knowledgebase corresponding to these. + + In case of repeated knowledgebase values like `users`, every possible result + is returned. + + Because interpolation can sometimes omit certain results or use some default + values, this object exposes a `logs` property with messages when such steps + were made. These messages can then be forwarded to the user specifying the + pattern to help the debug issues in case the pattern is behaving unexpectedly. + """ + + def __init__( + self, + pattern: str, + kb: knowledge_base_pb2.KnowledgeBase, + ) -> None: + self._results: list[str] = list() + self._logs: list[str] = list() + + user_attrs = [ + m["attr"] for m in re.finditer(r"%%users\.(?P\w+)%%", pattern) + ] + non_user_attrs = [ + m["attr"] for m in re.finditer(r"%%(?P\w+)%%", pattern) + ] + + if not user_attrs: + # If the pattern does not contain any user attributes, loops below won't + # yield any results. Hence, we add the pattern as-is for further expansion + # to always have at least one to work with. + self._results.append(pattern) else: - # Failed to expand, leave the variable as it was. - components.append("%%%s%%" % match.group(1)) - offset = match.end() - components.append(data_string[offset:]) # Append the final chunk. - return "".join(components) + # We start with interpolating `users` variables for each user. Because + # there can be multiple users on the system and the pattern can contain + # both user and non-user variables we have to then combine all possible + # user-based interpolations with non-user-based ones. + for user in kb.users: + # There might be cases in which username is not strictly necessary but + # scenario in which we do not have username but have other values is + # very unlikely. Assuming that users do have usernames makes the logic + # much simpler below. + if not (username := user.username): + self._logs.append( + f"user {user!r} without username", + ) + continue + + user_result = pattern + + # `userprofile` is a base for all default values so we precompute it + # ahead and provide various heuristics in case it is not available. + userprofile: str + if user.userprofile: + userprofile = user.userprofile + elif user.homedir: + userprofile = user.homedir + elif kb.environ_systemdrive: + userprofile = f"{kb.environ_systemdrive}\\Users\\{username}" + else: + userprofile = f"C:\\Users\\{username}" + + for attr in user_attrs: + try: + value = getattr(user, attr) + except AttributeError as error: + raise ValueError(f"`%%users.{attr}%%` does not exist") from error + + if not value: + try: + value = { + # pylint: disable=line-too-long + # pyformat: disable + "userprofile": userprofile, + "homedir": userprofile, + "temp": f"{userprofile}\\AppData\\Local\\Temp", + "desktop": f"{userprofile}\\Desktop", + "appdata": f"{userprofile}\\AppData\\Roaming", + "localappdata": f"{userprofile}\\AppData\\Local", + "cookies": f"{userprofile}\\AppData\\Local\\Microsoft\\Windows\\INetCookies", + "recent": f"{userprofile}\\AppData\\Roaming\\Microsoft\\Windows\\Recent", + "personal": f"{userprofile}\\Documents", + "startup": f"{userprofile}\\AppData\\Roaming\\Microsoft\\Windows\\Start Menu\\Programs\\Startup", + # pylint: enable=line-too-long + # pyformat: enable + }[attr] + except KeyError: + self._logs.append( + f"user {username!r} is missing {attr!r}", + ) + break + + self._logs.append( + f"using default {value!r} for {attr!r} for user {username!r}", + ) + + user_result = user_result.replace(f"%%users.{attr}%%", value) + else: + # This will run only if we successfully filled every variable. If any + # is missing we will break the loop and this block won't be executed. + self._results.append(user_result) + + # At this point all results have no user variables, so there is only one way + # to interpolate them. We do a pass for every variable in every result to + # expand these. + for attr in non_user_attrs: + try: + value = getattr(kb, attr) + except AttributeError as error: + raise ValueError(f"`%%{attr}%%` does not exist") from error + + if not value: + self._logs.append( + f"{attr!r} is missing", + ) + # If the attribute value is missing in the knowledge base, the pattern + # cannot be interpolated and should yield no results. + self._results = [] + + # Because strings in Python are immutable, we cannot simply iterate over + # the elements of the list if we want to update them, so we use indices to + # simulate references. + for i in range(len(self._results)): + self._results[i] = self._results[i].replace(f"%%{attr}%%", value) + + @property + def results(self) -> Sequence[str]: + return self._results + + @property + def logs(self) -> Sequence[str]: + return self._logs diff --git a/grr/core/grr_response_core/lib/parser.py b/grr/core/grr_response_core/lib/parser.py deleted file mode 100644 index e8419b9684..0000000000 --- a/grr/core/grr_response_core/lib/parser.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python -"""Registry for parsers and abstract classes for basic parser functionality.""" - -from typing import Any - -from grr_response_core.lib import rdfvalue -from grr_response_core.lib.parsers import abstract -from grr_response_core.lib.rdfvalues import client_action as rdf_client_action -from grr_response_core.lib.rdfvalues import client_fs as rdf_client_fs -from grr_response_core.lib.rdfvalues import file_finder as rdf_file_finder -from grr_response_core.lib.rdfvalues import protodict as rdf_protodict -from grr_response_core.lib.util import precondition - - -# TODO(hanuszczak): Type command parsers. -class CommandParser(abstract.SingleResponseParser[Any]): - """Abstract parser for processing command output. - - Must implement the Parse function. - """ - - # TODO(hanuszczak): This should probably be abstract or private. - def Parse(self, cmd, args, stdout, stderr, return_val, knowledge_base): - """Take the output of the command run, and yield RDFValues.""" - - def ParseResponse(self, knowledge_base, response): - precondition.AssertType(response, rdf_client_action.ExecuteResponse) - - return self.Parse( - cmd=response.request.cmd, - args=response.request.args, - stdout=response.stdout, - stderr=response.stderr, - return_val=response.exit_status, - knowledge_base=knowledge_base, - ) - - def CheckReturn(self, cmd, return_val): - """Raise if return value is bad.""" - if return_val != 0: - message = ( - "Parsing output of command '{command}' failed, as command had " - "{code} return code" - ) - raise abstract.ParseError(message.format(command=cmd, code=return_val)) - - -# TODO(hanuszczak): Type WMI query parsers. -class WMIQueryParser(abstract.MultiResponseParser[Any]): - """Abstract parser for processing WMI query output.""" - - # TODO(hanuszczak): Make this abstract. - def ParseMultiple(self, result_dicts): - """Take the output of the query, and yield RDFValues.""" - - def ParseResponses(self, knowledge_base, responses): - del knowledge_base # Unused. - precondition.AssertIterableType(responses, rdf_protodict.Dict) - - return self.ParseMultiple(responses) - - -# TODO(hanuszczak): Type registry value parsers. -class RegistryValueParser(abstract.SingleResponseParser[Any]): - """Abstract parser for processing Registry values.""" - - # TODO(hanuszczak): Make this abstract. - # TODO(hanuszczak): Make order of arguments consistent with other methods. - def Parse(self, stat, knowledge_base): - """Take the stat, and yield RDFValues.""" - - def ParseResponse(self, knowledge_base, response): - # TODO(hanuszczak): Why some of the registry value parsers anticipate string - # response? This is stupid. - precondition.AssertType( - response, (rdf_client_fs.StatEntry, rdfvalue.RDFString) - ) - - return self.Parse(response, knowledge_base) - - -# TODO(hanuszczak): Type registry parsers. -class RegistryParser(abstract.SingleResponseParser[Any]): - """Abstract parser for processing Registry values.""" - - # TODO(hanuszczak): Make this abstract. - # TODO(hanuszczak): Make order of arguments consistent with other methods. - def Parse(self, stat, knowledge_base): - """Take the stat, and yield RDFValues.""" - - def ParseResponse(self, knowledge_base, response): - precondition.AssertType(response, rdf_client_fs.StatEntry) - - return self.Parse(response, knowledge_base) - - -# TODO(hanuszczak): Type registry multi-parsers. -class RegistryMultiParser(abstract.MultiResponseParser[Any]): - """Abstract parser for processing registry values.""" - - # TODO(hanuszczak): Make this abstract. - def ParseMultiple(self, stats, knowledge_base): - raise NotImplementedError() - - def ParseResponses(self, knowledge_base, responses): - precondition.AssertIterableType(responses, rdf_client_fs.StatEntry) - - return self.ParseMultiple(responses, knowledge_base) - - -# TODO(hanuszczak): Type grep parsers. -class GrepParser(abstract.SingleResponseParser[Any]): - """Parser for the results of grep artifacts.""" - - # TODO(hanuszczak): Make this abstract. - # TODO(hanuszczak): Make order of arguments consistent with other methods. - def Parse(self, response, knowledge_base): - """Parse the FileFinderResult.matches.""" - - def ParseResponse(self, knowledge_base, response): - precondition.AssertType(response, rdf_file_finder.FileFinderResult) - - return self.Parse(response, knowledge_base) diff --git a/grr/core/grr_response_core/lib/parsers/__init__.py b/grr/core/grr_response_core/lib/parsers/__init__.py deleted file mode 100644 index 5223f06f84..0000000000 --- a/grr/core/grr_response_core/lib/parsers/__init__.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python -"""Generic parsers (for GRR server and client code).""" - -from typing import Iterator, Type, TypeVar - -from grr_response_core.lib import factory -from grr_response_core.lib import rdfvalue -from grr_response_core.lib.parsers import abstract -from grr_response_core.lib.util import collection -from grr_response_core.lib.util import precondition - -ParseError = abstract.ParseError - -Parser = abstract.Parser -SingleResponseParser = abstract.SingleResponseParser -SingleFileParser = abstract.SingleFileParser -MultiResponseParser = abstract.MultiResponseParser -MultiFileParser = abstract.MultiFileParser - -_Factory = factory.Factory -_RDFValue = rdfvalue.RDFValue - -SINGLE_RESPONSE_PARSER_FACTORY: _Factory[SingleResponseParser[_RDFValue]] = ( - _Factory(SingleResponseParser[_RDFValue]) -) - -MULTI_RESPONSE_PARSER_FACTORY: _Factory[MultiResponseParser[_RDFValue]] = ( - _Factory(MultiResponseParser[_RDFValue]) -) - -SINGLE_FILE_PARSER_FACTORY: _Factory[SingleFileParser[_RDFValue]] = _Factory( - SingleFileParser[_RDFValue] -) - -MULTI_FILE_PARSER_FACTORY: _Factory[MultiFileParser[_RDFValue]] = _Factory( - MultiFileParser[_RDFValue] -) - - -_P = TypeVar("_P", bound=Parser) - - -class ArtifactParserFactory: - """A factory wrapper class that yields parsers for specific artifact.""" - - def __init__(self, artifact_name: str) -> None: - """Initializes the artifact parser factory. - - Args: - artifact_name: A name of the artifact this factory is supposed to provide - parser instances for. - """ - precondition.AssertType(artifact_name, str) - self._artifact_name = artifact_name - - def HasParsers(self) -> bool: - return ( - self.HasSingleResponseParsers() - or self.HasMultiResponseParsers() - or self.HasSingleFileParsers() - or self.HasMultiFileParsers() - ) - - def HasSingleResponseParsers(self) -> bool: - return any(self.SingleResponseParserTypes()) - - def SingleResponseParsers(self) -> Iterator[SingleResponseParser[_RDFValue]]: - return self._CreateSupportedParsers(SINGLE_RESPONSE_PARSER_FACTORY) - - def SingleResponseParserNames(self) -> Iterator[str]: - return self._SupportedNames(SINGLE_RESPONSE_PARSER_FACTORY) - - def SingleResponseParserTypes( - self, - ) -> Iterator[Type[SingleResponseParser[_RDFValue]]]: - return self._SupportedTypes(SINGLE_RESPONSE_PARSER_FACTORY) - - def HasMultiResponseParsers(self) -> bool: - return any(self.MultiResponseParserTypes()) - - def MultiResponseParsers(self) -> Iterator[MultiResponseParser[_RDFValue]]: - return self._CreateSupportedParsers(MULTI_RESPONSE_PARSER_FACTORY) - - def MultiResponseParserNames(self) -> Iterator[str]: - return self._SupportedNames(MULTI_RESPONSE_PARSER_FACTORY) - - def MultiResponseParserTypes( - self, - ) -> Iterator[Type[MultiResponseParser[_RDFValue]]]: - return self._SupportedTypes(MULTI_RESPONSE_PARSER_FACTORY) - - def HasSingleFileParsers(self) -> bool: - return any(self.SingleFileParserTypes()) - - def SingleFileParsers(self) -> Iterator[SingleFileParser[_RDFValue]]: - return self._CreateSupportedParsers(SINGLE_FILE_PARSER_FACTORY) - - def SingleFileParserNames(self) -> Iterator[str]: - return self._SupportedNames(SINGLE_FILE_PARSER_FACTORY) - - def SingleFileParserTypes( - self, - ) -> Iterator[Type[SingleFileParser[_RDFValue]]]: - return self._SupportedTypes(SINGLE_FILE_PARSER_FACTORY) - - def HasMultiFileParsers(self) -> bool: - return any(self.MultiFileParserTypes()) - - def MultiFileParsers(self) -> Iterator[MultiFileParser[_RDFValue]]: - return self._CreateSupportedParsers(MULTI_FILE_PARSER_FACTORY) - - def MultiFileParserNames(self) -> Iterator[str]: - return self._SupportedNames(MULTI_FILE_PARSER_FACTORY) - - def MultiFileParserTypes(self) -> Iterator[Type[MultiFileParser[_RDFValue]]]: - return self._SupportedTypes(MULTI_FILE_PARSER_FACTORY) - - def AllParserTypes(self) -> Iterator[Type[Parser[_RDFValue]]]: - """Returns all known parser types applicable for the artifact.""" - return collection.Flatten([ - self.SingleResponseParserTypes(), - self.MultiResponseParserTypes(), - self.SingleFileParserTypes(), - self.MultiFileParserTypes(), - ]) - - def _CreateSupportedParsers(self, fac: _Factory[_P]) -> Iterator[_P]: - for name in self._SupportedNames(fac): - yield fac.Create(name) - - def _SupportedTypes(self, fac: _Factory[_P]) -> Iterator[Type[_P]]: - for name in self._SupportedNames(fac): - yield fac.GetType(name) - - def _SupportedNames(self, fac: _Factory[_P]) -> Iterator[str]: - for name in fac.Names(): - cls = fac.GetType(name) - if self._artifact_name in cls.supported_artifacts: - yield name diff --git a/grr/core/grr_response_core/lib/parsers/abstract.py b/grr/core/grr_response_core/lib/parsers/abstract.py deleted file mode 100644 index 09303863cf..0000000000 --- a/grr/core/grr_response_core/lib/parsers/abstract.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env python -"""Registry for parsers and abstract classes for basic parser functionality.""" - -import abc -from typing import Generic -from typing import IO -from typing import Iterable -from typing import Iterator -from typing import Optional -from typing import TypeVar - -from grr_response_core.lib import rdfvalue -from grr_response_core.lib.rdfvalues import client as rdf_client -from grr_response_core.lib.rdfvalues import paths as rdf_paths - - -class ParseError(Exception): - """A class for errors raised when parsers encounter problems when parsing. - - Attributes: - cause: An optional exception that caused this exception to be raised. - """ - - def __init__(self, message: str, cause: Optional[Exception] = None) -> None: - """Initializes the error. - - Args: - message: A message string explaining why the exception was raised. - cause: An optional exception that caused this exception to be raised. - - Returns: - Nothing. - """ - if cause is not None: - message = "{message}: {cause}".format(message=message, cause=cause) - - super().__init__(message) - self.cause = cause - - -_O = TypeVar("_O") # Type variable for parser output types. - - -class Parser(Generic[_O], metaclass=abc.ABCMeta): - """A base interface for all parsers types.""" - - # TODO(hanuszczak): Once support for Python 2 is dropped, properties below can - # be defined as abstract, ensuring that all subclasses really define them. - - # TODO(hanuszczak): It would be better if parsers identified types that they - # can parse rather than declare supported artifacts (which are defined in a - # completely different place, in an external repository). Then parser can have - # well-defined types. - - # A list of string identifiers for artifacts that this parser can process. - supported_artifacts = [] - - # Any knowledgebase dependencies required by the parser. Dependencies required - # by the artifact itself will be inferred from the artifact definition. - knowledgebase_dependencies = [] - - # TODO(hanuszczak): Parser should have well defined types and what they can - # return should be defined statically. Moreover, it is not possible to enforce - # that parser really yields what `output_types` specified so this serves no - # purpose other than documentation. - # - # There is only one parser that returns more than one type of value, so maybe - # it should be re-evaluated whether this field actually makes sense. - - # The semantic types that can be produced by this parser. - output_types = [] - - -class SingleResponseParser(Parser[_O]): - """An abstract class for parsers that are able to parse individual replies.""" - - @abc.abstractmethod - def ParseResponse( - self, - knowledge_base: rdf_client.KnowledgeBase, - response: rdfvalue.RDFValue, - ) -> Iterator[_O]: - """Parse a single response from the client. - - Args: - knowledge_base: A knowledgebase for the client that provided the response. - response: An RDF value representing the result of artifact collection. - - Raises: - ParseError: If parser is not able to parse the response. - """ - - -class SingleFileParser(Parser[_O]): - """An interface for parsers that read file content.""" - - # TODO(hanuszczak): Define a clear file reader interface. - - @abc.abstractmethod - def ParseFile( - self, - knowledge_base: rdf_client.KnowledgeBase, - pathspec: rdf_paths.PathSpec, - filedesc: IO[bytes], - ) -> Iterator[_O]: - """Parses a single file from the client. - - Args: - knowledge_base: A knowledgebase for the client to whom the file belongs. - pathspec: A pathspec corresponding to the parsed file. - filedesc: A file-like object to parse. - - Yields: - RDF values with parsed data. - - Raises: - ParseError: If parser is not able to parse the file. - """ - - -class MultiResponseParser(Parser[_O]): - """An interface for parsers requiring all replies in order to parse them.""" - - @abc.abstractmethod - def ParseResponses( - self, - knowledge_base: rdf_client.KnowledgeBase, - responses: Iterable[rdfvalue.RDFValue], - ) -> Iterator[_O]: - """Parse responses from the client. - - Args: - knowledge_base: A knowledgebase for the client that provided responses. - responses: A list of RDF values with results of artifact collection. - - Raises: - ParseError: If parser is not able to parse the responses. - """ - - -class MultiFileParser(Parser[_O]): - """An interface for parsers that need to read content of multiple files.""" - - # TODO(hanuszczak): The file interface mentioned above should also have - # `pathspec` property. With the current solution there is no way to enforce - # on the type level that `pathspecs` and `filedescs` have the same length and - # there is no clear correlation between the two. One possible solution would - # be to use a list of pairs but this is ugly to document. - - @abc.abstractmethod - def ParseFiles( - self, - knowledge_base: rdf_client.KnowledgeBase, - pathspecs: Iterable[rdf_paths.PathSpec], - filedescs: Iterable[IO[bytes]], - ) -> Iterator[_O]: - """Parses multiple files from the client. - - Args: - knowledge_base: A knowledgebase for the client to whome the files belong. - pathspecs: A list of pathspecs corresponding to the parsed files. - filedescs: A list of file-like objects to parse. - - Yields: - RDF values with parsed data. - - Raises: - ParseError: If parser is not able to parse the files. - """ diff --git a/grr/core/grr_response_core/lib/parsers/all.py b/grr/core/grr_response_core/lib/parsers/all.py deleted file mode 100644 index a295513d2c..0000000000 --- a/grr/core/grr_response_core/lib/parsers/all.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python -"""A module for registering all known parsers.""" - - -def Register(): - """Adds all known parsers to the registry.""" - # pyformat: disable - # pyformat: enable diff --git a/grr/core/grr_response_core/lib/parsers/config_file.py b/grr/core/grr_response_core/lib/parsers/config_file.py deleted file mode 100644 index e921dcce70..0000000000 --- a/grr/core/grr_response_core/lib/parsers/config_file.py +++ /dev/null @@ -1,357 +0,0 @@ -#!/usr/bin/env python -"""Simple parsers for configuration files.""" - -from collections import abc -import logging -import re - -from grr_response_core.lib import lexer -from grr_response_core.lib.rdfvalues import config_file as rdf_config_file -from grr_response_core.lib.util import precondition - - -def AsIter(arg): - """Encapsulates an argument in a tuple, if it's not already iterable.""" - if isinstance(arg, str): - rslt = [arg] - elif isinstance(arg, abc.Iterable): - rslt = arg - elif not arg: - rslt = [] - else: - rslt = [arg] - return tuple(rslt) - - -# Grr lexer implementation of ssv parser. Considered using -# https://github.com/Eugeny/reconfigure/blob/master/reconfigure/parsers/ssv.py -# but it doesn't seem to actually forward lookup. -class FieldParser(lexer.Lexer): - r"""A generalized field based parser that splits entries into fields. - - Entries refer to distinct records within the text content, for example each - line of /etc/passwd or a ssh configuration attribute. - Fields are elements that make up the entry, for example the individual - parameters in /etc/passwd. - - The parser supports: - - Flexible field based separators (e.g. spaces, commas, colons). - - Identification and removal of line comments. Inline comments (e.g. /*...*/) - are not supported. - - Line continuation detection. - - Multiline quotes. - - The parser uses the following attributes as defaults: - - comments: # - - cont: \ (followed by any amount of whitespace) - - ml_quote: False (by default, quotes must close before newlines). - - quot: Both " and ' characters. - - sep: Whitespace - - term: Newlines. - - To override default values, pass in appropriate keywords with a python - compatible regex string. - """ - - def __init__( - self, - comments=r"#", - cont=r"\\\s*\n", - ml_quote=False, - quot=(r"\"", r"'"), - sep=r"[ \t\f\v]+", - term=r"[\r\n]", - verbose=0, - ): - r"""A generalized field-based parser. - - Handles whitespace, csv etc. - - Args: - comments: Line comment patterns (e.g. "#"). - cont: Continuation patterns (e.g. "\\"). - ml_quote: Boolean flag to allow quoted strings to span lines. - quot: Quotation patterns (e.g. "\\"" or "'"). - sep: Field separator patterns (e.g. "[\\s,]"). - term: Entry termination patterns (e.g. "\\n"). - verbose: Enable verbose mode for the lexer. Useful for debugging. - """ - super().__init__() - self.entries = [] - self.fields = [] - self.field = "" - self.comments = AsIter(comments) - self.cont = AsIter(cont) - self.ml_quote = AsIter(ml_quote) - self.quot = AsIter(quot) - self.sep = AsIter(sep) - self.term = AsIter(term) - self.verbose = verbose - self._GenStates() - - def Reset(self): - super().Reset() - self.entries = [] - self.fields = [] - self.field = "" - - def _GenStates(self): - """Generate the lexer states.""" - self.GenCommentState() - self.GenFwdState() - self.GenQuotedState() - self.GenCatchallState() - - def _AddToken(self, state_regex, regex, actions, next_state): - self._tokens.append(lexer.Token(state_regex, regex, actions, next_state)) - - def GenCommentState(self): - if self.comments: - self._AddToken("COMMENT", r"\n", "PushBack,PopState", None) - self._AddToken("COMMENT", ".", None, None) - - def GenFwdState(self): - """Generates forwarding state rules. - - The lexer will fast forward until there is string content. The - string content will be returned to the string processor. - """ - for c in self.cont: - self._AddToken("FWD", c, None, None) - for s in self.sep: - self._AddToken("FWD", s, None, None) - self._AddToken("FWD", ".", "PushBack,PopState", None) - - def GenQuotedState(self): - """Generate string matching state rules.""" - for i, q in enumerate(self.quot): - label = "%s_STRING" % i - escaped = re.escape(q) - self._AddToken(label, escaped, "PopState", None) - self._AddToken(label, q, "PopState", None) - if self.ml_quote: - self._AddToken(label, r"\n", None, None) - else: - self._AddToken(label, r"\n", "BadLine", None) - self._AddToken(label, ".", "AddToField", None) - - def GenCatchallState(self): - """Generate string matching state rules. - - This sets up initial state handlers that cover both the 'INITIAL' state - and the intermediate content between fields. - - The lexer acts on items with precedence: - - continuation characters: use the fast forward state rules. - - field separators: finalize processing the field. - - quotation characters: use the quotation state rules. - """ - for c in self.comments: - self._AddToken(".", c, "PushState,EndField", "COMMENT") - for c in self.cont: - self._AddToken(".", c, "PushState", "FWD") - for t in self.term: - self._AddToken(".", t, "EndEntry", None) - for s in self.sep: - self._AddToken(".", s, "EndField", None) - for i, q in enumerate(self.quot): - self._AddToken(".", q, "PushState", "%s_STRING" % i) - self._AddToken(".", ".", "AddToField", None) - - def EndEntry(self, **_): - self.EndField() - if self.fields: - # Copy the fields into the processed entries. - self.entries.append(self.fields[:]) - self.fields = [] - - def AddToField(self, string="", **_): - if string: - self.field += string - - def EndField(self, **_): - if self.field: - self.fields.append(self.field[:]) - self.field = "" - - def BadLine(self, **_): - logging.debug("Skipped bad line in file at %s", self.processed) - self.field = "" - - def ParseEntries(self, data: str): - precondition.AssertType(data, str) - - # Flush any old results. - self.Reset() - self.Feed(data) - self.Close() - # In case there isn't a terminating field at the end of the feed, e.g. \n - self.EndEntry() - return self.entries - - -class KeyValueParser(FieldParser): - """A generalized KeyValue parser that splits entries into key/value pairs. - - Capabilities and parameters are identical to FieldParser, with one difference. - The parser also accepts the parameter "kv_sep" - Patterns specified in kv_sep are used to demarcate key/value processing. - - kv_sep defaults to "=" - """ - - def __init__( - self, - comments=r"#", - cont=r"\\\s*\n", - kv_sep="=", - ml_quote=False, - quot=(r"\"", r"'"), - sep=r"[ \t\f\v]+", - term=r"[\r\n]", - verbose=0, - ): - """A generalized key-value parser. - - Handles whitespace, csv etc. - - Args: - comments: Line comment patterns (e.g. "#"). - cont: Continuation patterns (e.g. "\\"). - kv_sep: Key/Value separators (e.g. "=" or ":"). - ml_quote: Boolean flag to allow quoted strings to span lines. - quot: Quotation patterns (e.g. "\\"" or "'"). - sep: Field separator patterns (e.g. "[\\s,]"). - term: Entry termination patterns (e.g. "\\n"). - verbose: Enable verbose mode for the lexer. Useful for debugging. - """ - self.kv_sep = AsIter(kv_sep) - super().__init__( - comments=comments, - cont=cont, - ml_quote=ml_quote, - quot=quot, - sep=sep, - term=term, - verbose=verbose, - ) - self.key_field = "" - - def _GenStates(self): - self.GenCommentState() - self.GenFwdState() - self.GenQuotedState() - self.GenMatchFirstState() - self.GenInitialState() - self.GenKeyState() - self.GenValueState() - self.GenCatchallState() - - def GenMatchFirstState(self): - for i, q in enumerate(self.quot): - self._AddToken(".", q, "PushState", "%s_STRING" % i) - for c in self.cont: - self._AddToken(".", c, "PushState", "FWD") - - def GenInitialState(self): - for c in self.comments: - self._AddToken("INITIAL", c, "PushState,EndField", "COMMENT") - for t in self.term: - self._AddToken("INITIAL", t, "EndField,EndEntry", None) - for c in self.sep: - self._AddToken("INITIAL", c, "PushState", "FWD") - for k in self.kv_sep: - self._AddToken("INITIAL", k, "BadLine", None) - self._AddToken("INITIAL", ".", "PushState,PushBack", "KEY") - - def GenKeyState(self): - for c in self.comments: - self._AddToken( - "KEY", c, "EndKeyField,EndEntry,PopState,PushBack", "COMMENT" - ) - for t in self.term: - self._AddToken("KEY", t, "EndKeyField,EndEntry,PopState", None) - for k in self.kv_sep: - self._AddToken("KEY", k, "EndKeyField", "VALUE") - - def GenValueState(self): - for c in self.comments: - self._AddToken( - "VALUE", c, "EndField,EndEntry,PopState,PushBack", "COMMENT" - ) - for t in self.term: - self._AddToken("VALUE", t, "EndField,EndEntry,PopState", None) - for s in self.sep: - self._AddToken("VALUE", s, "EndField", None) - - def GenCatchallState(self): - self._AddToken(".", ".", "AddToField", None) - - def EndKeyField(self, **_): - self.key_field = self.field - self.field = "" - - def EndEntry(self, **_): - # Finalize processing for non-terminated entries. Key first, then fields. - if self.field and not self.key_field: - self.EndKeyField() - else: - self.EndField() - # Set up the entry. - key_field = self.key_field.strip() - if key_field: - self.entries.append({key_field: self.fields}) - self.key_field = "" - self.fields = [] - - def ParseToOrderedDict(self, data): - result = dict() - for field in self.ParseEntries(data): - result.update(field) - return result - - -class RsyslogFieldParser(FieldParser): - """Field parser for syslog configurations.""" - - log_rule_re = re.compile(r"([\w,\*]+)\.([\w,!=\*]+)") - destinations = dict([ - ("TCP", re.compile(r"(?:@@)([^;]*)")), - ("UDP", re.compile(r"(?:@)([^;]*)")), - ("PIPE", re.compile(r"(?:\|)([^;]*)")), - ("NONE", re.compile(r"(?:~)([^;]*)")), - ("SCRIPT", re.compile(r"(?:\^)([^;]*)")), - ("MODULE", re.compile(r"(?::om\w:)([^;]*)")), - ("FILE", re.compile(r"-?(/[^;]*)")), ("WALL", re.compile(r"(\*)")) - ]) # pyformat: disable - - def ParseAction(self, action): - """Extract log configuration data from rsyslog actions. - - Actions have the format: - / ;