From fcb019db14e7a2f314102529e0f6c68a36b66bca Mon Sep 17 00:00:00 2001 From: zhujiem Date: Tue, 5 Sep 2023 21:49:42 +0800 Subject: [PATCH] Add Logram from TSE'20 --- README.md | 7 +- THIRD_PARTIES.md | 1 + logparser/Logram/README.md | 60 ++++++++ logparser/Logram/__init__.py | 1 + logparser/Logram/benchmark.py | 183 ++++++++++++++++++++++++ logparser/Logram/demo.py | 22 +++ logparser/Logram/requirements.txt | 4 + logparser/Logram/src/Common.py | 50 +++++++ logparser/Logram/src/DictionarySetUp.py | 53 +++++++ logparser/Logram/src/Logram.py | 59 ++++++++ logparser/Logram/src/MatchToken.py | 120 ++++++++++++++++ logparser/Logram/src/__init__.py | 0 logparser/version.py | 2 +- setup.py | 2 +- 14 files changed, 559 insertions(+), 5 deletions(-) create mode 100644 logparser/Logram/README.md create mode 100644 logparser/Logram/__init__.py create mode 100644 logparser/Logram/benchmark.py create mode 100644 logparser/Logram/demo.py create mode 100644 logparser/Logram/requirements.txt create mode 100644 logparser/Logram/src/Common.py create mode 100644 logparser/Logram/src/DictionarySetUp.py create mode 100644 logparser/Logram/src/Logram.py create mode 100644 logparser/Logram/src/MatchToken.py create mode 100644 logparser/Logram/src/__init__.py diff --git a/README.md b/README.md index 77b06654..e59b4acb 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@
Python version Pypi version -Pypi version +Pypi version Downloads License License @@ -22,7 +22,7 @@ Logparser provides a machine learning toolkit and benchmarks for automated log p ### 🌈 New updates -+ Since the first release of logparser, many PRs and issues have been submitted due to incompatibility with Python 3. Finally, we update logparser v1.0.0 with support for Python 3. Thanks for all the contributions! ([#PR86](https://github.com/logpai/logparser/pull/86), [#PR85](https://github.com/logpai/logparser/pull/85), [#PR83](https://github.com/logpai/logparser/pull/83), [#PR80](https://github.com/logpai/logparser/pull/80), [#PR65](https://github.com/logpai/logparser/pull/65), [#PR57](https://github.com/logpai/logparser/pull/57), [#PR53](https://github.com/logpai/logparser/pull/53), [#PR52](https://github.com/logpai/logparser/pull/52), [#PR51](https://github.com/logpai/logparser/pull/51), [#PR49](https://github.com/logpai/logparser/pull/49), [#PR18](https://github.com/logpai/logparser/pull/18), [#PR22](https://github.com/logpai/logparser/pull/22)) ++ Since the first release of logparser, many PRs and issues have been submitted due to incompatibility with Python 3. Finally, we update logparser v1.0.0 with support for Python 3. Thanks for all the contributions ([#PR86](https://github.com/logpai/logparser/pull/86), [#PR85](https://github.com/logpai/logparser/pull/85), [#PR83](https://github.com/logpai/logparser/pull/83), [#PR80](https://github.com/logpai/logparser/pull/80), [#PR65](https://github.com/logpai/logparser/pull/65), [#PR57](https://github.com/logpai/logparser/pull/57), [#PR53](https://github.com/logpai/logparser/pull/53), [#PR52](https://github.com/logpai/logparser/pull/52), [#PR51](https://github.com/logpai/logparser/pull/51), [#PR49](https://github.com/logpai/logparser/pull/49), [#PR18](https://github.com/logpai/logparser/pull/18), [#PR22](https://github.com/logpai/logparser/pull/22))! + We build the package wheel logparser3 and release it on pypi. Please install via `pip install logparser3`. + We refactor the code structure and beautify the code via the Python code formatter black. @@ -43,6 +43,7 @@ Logparser provides a machine learning toolkit and benchmarks for automated log p | ICDM'16 | [Spell](https://github.com/logpai/logparser/tree/main/logparser/Spell#spell) | [Spell: Streaming Parsing of System Event Logs](https://www.cs.utah.edu/~lifeifei/papers/spell.pdf), by Min Du, Feifei Li. | | ICWS'17 | [Drain](https://github.com/logpai/logparser/tree/main/logparser/Drain#drain) | [Drain: An Online Log Parsing Approach with Fixed Depth Tree](https://jiemingzhu.github.io/pub/pjhe_icws2017.pdf), by Pinjia He, Jieming Zhu, Zibin Zheng, and Michael R. Lyu.| | ICPC'18 | [MoLFI](https://github.com/logpai/logparser/tree/main/logparser/MoLFI#molfi) | [A Search-based Approach for Accurate Identification of Log Message Formats](http://publications.uni.lu/bitstream/10993/35286/1/ICPC-2018.pdf), by Salma Messaoudi, Annibale Panichella, Domenico Bianculli, Lionel Briand, Raimondas Sasnauskas. | +| TSE'20 | [Logram](https://github.com/logpai/logparser/tree/main/logparser/Logram#logram) | [Logram: Efficient Log Parsing Using n-Gram Dictionaries](https://arxiv.org/pdf/2001.03038.pdf), by Hetong Dai, Heng Li, Che-Shao Chen, Weiyi Shang, and Tse-Hsun (Peter) Chen. | :bulb: Welcome to submit a PR to push your parser code to logparser and add your paper to the table. @@ -121,7 +122,7 @@ The main goal of logparser is used for research and benchmark purpose. Researche + Please be aware of the licenses of [third-party libraries](https://github.com/logpai/logparser/blob/main/THIRD_PARTIES.md) used in logparser. We suggest to keep one parser and delete the others and then re-build the package wheel. This would not break the use of logparser. + Please enhance logparser with efficiency and scalability with multi-processing, add failure recovery, add persistence to disk or message queue Kafka. -+ [Drain3](https://github.com/logpai/Drain3) provides a good example for your reference that is built with [practical enhancements] for production scenarios. ++ [Drain3](https://github.com/logpai/Drain3) provides a good example for your reference that is built with [practical enhancements](https://github.com/logpai/Drain3#new-features) for production scenarios. ### Citation 👋 If you use our logparser tools or benchmarking results in your publication, please cite the following papers. diff --git a/THIRD_PARTIES.md b/THIRD_PARTIES.md index f83f7c36..2971c139 100644 --- a/THIRD_PARTIES.md +++ b/THIRD_PARTIES.md @@ -7,3 +7,4 @@ The logparser package is built on top of the following third-party libraries: | LenMa | https://github.com/keiichishima/templateminer | BSD | | MoLFI | https://github.com/SalmaMessaoudi/MoLFI | Apache-2.0 | | alignment (LogMine) | https://gist.github.com/aziele/6192a38862ce569fe1b9cbe377339fbe | GPL | +| Logram | https://github.com/BlueLionLogram/Logram | NA | diff --git a/logparser/Logram/README.md b/logparser/Logram/README.md new file mode 100644 index 00000000..b0fae071 --- /dev/null +++ b/logparser/Logram/README.md @@ -0,0 +1,60 @@ +# Logram + +Logram is an automated log parsing technique, which leverages n-gram dictionaries to achieve efficient log parsing. + +Read more information about Logram from the following paper: + ++ Hetong Dai, Heng Li, Che-Shao Chen, Weiyi Shang, and Tse-Hsun (Peter) Chen. [Logram: Efficient Log Parsing Using n-Gram +Dictionaries](https://arxiv.org/pdf/2001.03038.pdf), *IEEE Transactions on Software Engineering (TSE)*, 2020. + +### Running + +The code has been tested in the following enviornment: ++ python 3.7.6 ++ regex 2022.3.2 ++ pandas 1.0.1 ++ numpy 1.18.1 ++ scipy 1.4.1 + +Run the following scripts to start the demo: + +``` +python demo.py +``` + +Run the following scripts to execute the benchmark: + +``` +python benchmark.py +``` + +### Benchmark + +Running the benchmark script on Loghub_2k datasets, you could obtain the following results. + +| Dataset | F1_measure | Accuracy | +|:-----------:|:----------|:--------| +| HDFS | 0.990518 | 0.93 | +| Hadoop | 0.78249 | 0.451 | +| Spark | 0.479691 | 0.282 | +| Zookeeper | 0.923936 | 0.7235 | +| BGL | 0.956032 | 0.587 | +| HPC | 0.993748 | 0.9105 | +| Thunderbird | 0.993876 | 0.554 | +| Windows | 0.913735 | 0.694 | +| Linux | 0.541378 | 0.361 | +| Android | 0.975017 | 0.7945 | +| HealthApp | 0.587935 | 0.2665 | +| Apache | 0.637665 | 0.3125 | +| Proxifier | 0.750476 | 0.5035 | +| OpenSSH | 0.979348 | 0.6115 | +| OpenStack | 0.742866 | 0.3255 | +| Mac | 0.892896 | 0.568 | + + +### Citation + +:telescope: If you use our logparser tools or benchmarking results in your publication, please kindly cite the following papers. + ++ [**ICSE'19**] Jieming Zhu, Shilin He, Jinyang Liu, Pinjia He, Qi Xie, Zibin Zheng, Michael R. Lyu. [Tools and Benchmarks for Automated Log Parsing](https://arxiv.org/pdf/1811.03509.pdf). *International Conference on Software Engineering (ICSE)*, 2019. ++ [**DSN'16**] Pinjia He, Jieming Zhu, Shilin He, Jian Li, Michael R. Lyu. [An Evaluation Study on Log Parsing and Its Use in Log Mining](https://jiemingzhu.github.io/pub/pjhe_dsn2016.pdf). *IEEE/IFIP International Conference on Dependable Systems and Networks (DSN)*, 2016. diff --git a/logparser/Logram/__init__.py b/logparser/Logram/__init__.py new file mode 100644 index 00000000..df603f0c --- /dev/null +++ b/logparser/Logram/__init__.py @@ -0,0 +1 @@ +from .src.Logram import * diff --git a/logparser/Logram/benchmark.py b/logparser/Logram/benchmark.py new file mode 100644 index 00000000..c60e24ff --- /dev/null +++ b/logparser/Logram/benchmark.py @@ -0,0 +1,183 @@ +# ========================================================================= +# Copyright (C) 2016-2023 LOGPAI (https://github.com/logpai). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========================================================================= + + +import sys +sys.path.append("../../") +from logparser.Logram import LogParser +from logparser.utils import evaluator +import os +import pandas as pd + + +input_dir = "../../data/loghub_2k/" # The input directory of log file +output_dir = "Logram_result/" # The output directory of parsing results + +benchmark_settings = { + "HDFS": { + "log_file": "HDFS/HDFS_2k.log", + "log_format": "