Skip to content

Commit

Permalink
Added Defects4j and BugsInPy Programs (#7)
Browse files Browse the repository at this point in the history
* Streamlined project
* Added latest versions of programs included in Defects4j and BugsInPy
  • Loading branch information
DavidBakerEffendi authored Oct 27, 2024
1 parent 8bb3d91 commit 90840b7
Show file tree
Hide file tree
Showing 23 changed files with 335 additions and 278 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: pr
on: pull_request
jobs:
test:
compile:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand All @@ -18,8 +18,8 @@ jobs:
~/.sbt
~/.coursier
key: ${{ runner.os }}-sbt-${{ hashfiles('**/build.sbt') }}
- name: Compile and run tests
run: sbt clean test
- name: Ensure successful compilation
run: sbt clean compile
formatting:
runs-on: ubuntu-latest
steps:
Expand All @@ -38,6 +38,6 @@ jobs:
~/.coursier
key: ${{ runner.os }}-sbt-${{ hashfiles('**/build.sbt') }}
- name: Check formatting
run: sbt scalafmtCheck Test/scalafmtCheck
- run: echo "Previous step failed because code is not formatted. Run 'sbt scalafmt Test/scalafmt'"
run: sbt scalafmtCheck
- run: echo "Previous step failed because code is not formatted. Run 'sbt scalafmt'"
if: ${{ failure() }}
8 changes: 5 additions & 3 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: '0'
fetch-depth: '1'
- name: Set up JDK
uses: actions/setup-java@v4
with:
Expand Down Expand Up @@ -44,7 +44,9 @@ jobs:
mv workspace/ichnaea.zip ichnaea.zip
mv workspace/securibench-micro-JAVA.zip securibench-micro-JAVA.zip
mv workspace/securibench-micro-JAVASRC.zip securibench-micro-JAVASRC.zip
mv workspace/THORAT.zip THORAT.zip
mv workspace/thorat.zip thorat.zip
mv workspace/defects4j.zip defects4j.zip
mv workspace/bugs_in_py.zip bugs_in_py.zip
- name: Set next release version
id: taggerFinal
uses: anothrNick/[email protected]
Expand All @@ -59,4 +61,4 @@ jobs:
ichnaea.zip
securibench-micro-JAVA.zip
securibench-micro-JAVASRC.zip
THORAT.zip
thorat.zip
19 changes: 4 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Joern Benchmarks
================

A repository for running Joern against known benchmarks.
A repository for building snapshots of datasets used by `joern-benchmarks`.

## Usage

Expand All @@ -11,21 +11,10 @@ sbt stage
joern-benchmark v0.0.1
Usage: joern-benchmark [options] benchmark

A benchmarking suite for Joern
A benchmark downloader tool for Joern benchmarks
-h, --help
--version Prints the version
benchmark The benchmark to run. Available [ALL,OWASP_JAVASRC,OWASP_JAVA,SECURIBENCH_MICRO_JAVASRC,SECURIBENCH_MICRO_JAVA]
benchmark The benchmark to download. Available [ALL,SECURIBENCH_MICRO_SRC,SECURIBENCH_MICRO_JAVA,ICHNAEA_JSSRC,THORAT_PYSRC,BUGS_IN_PY,DEFECTS4J]
-d, --dataset-dir <value>
The dataset directory where benchmarks will be downloaded to. Default is `./workspace`.
The dataset directory where benchmarks will be downloaded to. Default is `./workspace`
```

## Benchmarks

The benchmark naming convention of `<BENCHMARK>_<FRONTEND>`, e.g. `OWASP_JAVA` runs `OWASP` using the `jimple2cpg`
frontend (JVM bytecode).

| Benchmark | Status | Enabled Frontends |
|-----------------------------------------------------------------------|--------|-------------------|
| [`OWASP`](https://owasp.org/www-project-benchmark/) | WIP | `JAVASRC` |
| [`SECURIBENCH_MICRO`](https://github.com/too4words/securibench-micro) | WIP | `JAVASRC` `JAVA` |
| [`ICHNAEA`](https://www.franktip.org/pubs/tse2020.pdf) | WIP | `JSSRC` |
33 changes: 11 additions & 22 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,32 +1,21 @@
name := "joern-benchmarks-datasets-datasets"
name := "joern-benchmarks-datasets"
ThisBuild / organization := "io.joern"
ThisBuild / scalaVersion := "3.4.1"

// parsed by project/Versions.scala, updated by updateDependencies.sh
val cpgVersion = "1.6.11"
val joernVersion = "2.0.348"
val overflowdbVersion = "1.192"


libraryDependencies ++= Seq(
"com.github.pathikrit" %% "better-files" % Versions.betterFiles,
"com.github.scopt" %% "scopt" % Versions.scopt,
"com.github.pathikrit" %% "better-files" % Versions.betterFiles,
"com.github.scopt" %% "scopt" % Versions.scopt,
"org.apache.logging.log4j" % "log4j-slf4j2-impl" % Versions.log4j % Optional,
"com.lihaoyi" %% "requests" % Versions.requests,
"com.lihaoyi" %% "upickle" % Versions.upickle,
"io.joern" %% "joern-cli" % Versions.joern,
"io.joern" %% "x2cpg" % Versions.joern
"com.lihaoyi" %% "requests" % Versions.requests,
"com.lihaoyi" %% "upickle" % Versions.upickle,
"com.github.sh4869" %% "semver-parser-scala" % Versions.semver,
"org.apache.commons" % "commons-compress" % Versions.commonsCompress
)

// mostly so that `sbt assembly` works, but also to ensure that we don't end up
// with unexpected shadowing in jar hell
excludeDependencies ++= Seq(ExclusionRule("io.shiftleft", "codepropertygraph-domain-classes_3"))

assembly / assemblyMergeStrategy := {
case "log4j2.xml" => MergeStrategy.first
case "module-info.class" => MergeStrategy.first
case "META-INF/versions/9/module-info.class" => MergeStrategy.first
case "io/github/retronym/java9rtexport/Export.class" => MergeStrategy.first
case "log4j2.xml" => MergeStrategy.first
case "module-info.class" => MergeStrategy.first
case "META-INF/versions/9/module-info.class" => MergeStrategy.first
case PathList("scala", "collection", "internal", "pprint", _) => MergeStrategy.first
case x =>
val oldStrategy = (ThisBuild / assemblyMergeStrategy).value
Expand All @@ -48,5 +37,5 @@ ThisBuild / resolvers ++= Seq(
"Gradle Releases" at "https://repo.gradle.org/gradle/libs-releases/"
)

Compile / doc / sources := Seq.empty
Compile / doc / sources := Seq.empty
Compile / packageDoc / publishArtifact := false
76 changes: 0 additions & 76 deletions install-local-joern.sh

This file was deleted.

1 change: 0 additions & 1 deletion joern

This file was deleted.

2 changes: 1 addition & 1 deletion joern-benchmarks-datasets
48 changes: 0 additions & 48 deletions project/DownloadHelper.scala

This file was deleted.

7 changes: 0 additions & 7 deletions project/Projects.scala

This file was deleted.

31 changes: 6 additions & 25 deletions project/Versions.scala
Original file line number Diff line number Diff line change
@@ -1,28 +1,9 @@
object Versions {
val cpg = parseVersion("cpgVersion")
val joern = parseVersion("joernVersion")
val overflowdb = parseVersion("overflowdbVersion")

val betterFiles = "3.9.2"
val log4j = "2.20.0"
val requests = "0.8.0"
val scopt = "4.1.0"
val upickle = "3.3.0"

val jsAstGen = "3.14.0"

private def parseVersion(key: String): String = {
val versionRegexp = s""".*val $key[ ]+=[ ]?"(.*?)"""".r
val versions: List[String] = scala.io.Source
.fromFile("build.sbt")
.getLines
.filter(_.contains(s"val $key"))
.collect { case versionRegexp(version) => version }
.toList
assert(
versions.size == 1,
s"""unable to extract $key from build.sbt, expected exactly one line like `val $key= "0.0.0-SNAPSHOT"`."""
)
versions.head
}
val commonsCompress = "1.27.1"
val log4j = "2.20.0"
val requests = "0.8.0"
val scopt = "4.1.0"
val upickle = "3.3.0"
val semver ="0.0.6"
}
1 change: 0 additions & 1 deletion project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3")
addSbtPlugin("com.github.sbt" % "sbt-findbugs" % "2.0.0")
addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.1.1")
addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.7")
addSbtPlugin("io.shiftleft" % "sbt-overflowdb" % "2.104")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1")
1 change: 0 additions & 1 deletion repl

This file was deleted.

25 changes: 9 additions & 16 deletions src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,24 @@ package io.joern.benchmarks.datasets
import io.joern.benchmarks.datasets.BenchmarkDataset.benchmarkConstructors
import io.joern.benchmarks.datasets.AvailableBenchmarks
import io.joern.benchmarks.datasets.runner.{
BugsInPyDownloader,
DatasetDownloader,
Defects4jDownloader,
IchnaeaDownloader,
SecuribenchMicroDownloader,
ThoratDownloader
}
import org.slf4j.LoggerFactory
import upickle.default.*

/** The main benchmarking process.
*/
class BenchmarkDataset(config: BenchmarkDatasetConfig) {
private val logger = LoggerFactory.getLogger(getClass)

def evaluate(): Unit = {
logger.info("Beginning evaluation")
logger.info("Beginning downloads")

def runBenchmark(benchmarkRunnerCreator: BenchmarkDatasetConfig => DatasetDownloader): Unit = {
val benchmarkRunner = benchmarkRunnerCreator(config)
val benchmarkName = benchmarkRunner.benchmarkName
logger.info(s"Running $benchmarkName")
benchmarkRunner.run()
benchmarkRunnerCreator(config).run()
}

if (config.benchmark == AvailableBenchmarks.ALL) {
Expand All @@ -37,18 +34,14 @@ class BenchmarkDataset(config: BenchmarkDatasetConfig) {
object BenchmarkDataset {
val benchmarkConstructors: Map[AvailableBenchmarks.Value, BenchmarkDatasetConfig => DatasetDownloader] = Map(
(
AvailableBenchmarks.SECURIBENCH_MICRO_JAVASRC,
AvailableBenchmarks.SECURIBENCH_MICRO_SRC,
x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVASRC)
),
(AvailableBenchmarks.SECURIBENCH_MICRO_JAVA, x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVA)),
(AvailableBenchmarks.ICHNAEA_JSSRC, x => new IchnaeaDownloader(x.datasetDir)),
(
AvailableBenchmarks.SECURIBENCH_MICRO_SEMGREP,
x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.SEMGREP)
),
(AvailableBenchmarks.ICHNAEA_SEMGREP, x => new IchnaeaDownloader(x.datasetDir)),
(AvailableBenchmarks.THORAT_PYSRC, x => new ThoratDownloader(x.datasetDir)),
(AvailableBenchmarks.THORAT_SEMGREP, x => new ThoratDownloader(x.datasetDir))
(AvailableBenchmarks.ICHNAEA, x => new IchnaeaDownloader(x.datasetDir)),
(AvailableBenchmarks.THORAT, x => new ThoratDownloader(x.datasetDir)),
(AvailableBenchmarks.BUGS_IN_PY, x => new BugsInPyDownloader(x.datasetDir)),
(AvailableBenchmarks.DEFECTS4J, x => new Defects4jDownloader(x.datasetDir))
)

}
Loading

0 comments on commit 90840b7

Please sign in to comment.