From 986e233599b7310b0a429f91a40fb5dea6b6789c Mon Sep 17 00:00:00 2001 From: Ethan Rose <33912936+errose28@users.noreply.github.com> Date: Wed, 20 Nov 2024 13:44:20 -0500 Subject: [PATCH] HDDS-11471. Add new tests for container scanner detecting multiple errors in one container (#7396) --- .../ozoneimpl/MetadataScanResult.java | 10 +- .../container/common/ContainerTestUtils.java | 24 +- .../keyvalue/TestContainerCorruptions.java | 232 ++++++++++++++++++ .../keyvalue/TestKeyValueContainerCheck.java | 142 ++++++++++- .../ozoneimpl/TestDataScanResult.java | 84 +++++++ .../ozoneimpl/TestMetadataScanResult.java | 60 +++++ ...groundContainerDataScannerIntegration.java | 10 +- ...ndContainerMetadataScannerIntegration.java | 13 +- ...stContainerScannerIntegrationAbstract.java | 188 -------------- ...DemandContainerDataScannerIntegration.java | 19 +- 10 files changed, 563 insertions(+), 219 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestDataScanResult.java create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestMetadataScanResult.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/MetadataScanResult.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/MetadataScanResult.java index e394ba54fe1..af47a2ece79 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/MetadataScanResult.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/MetadataScanResult.java @@ -78,12 +78,14 @@ public List getErrors() { */ @Override public String toString() { - if (errors.isEmpty()) { - return "Scan result has 0 errors"; + if (deleted) { + return "Container was deleted"; + } else if (errors.isEmpty()) { + return "Container has 0 errors"; } else if (errors.size() == 1) { - return "Scan result has 1 error: " + errors.get(0); + return "Container has 1 error: " + errors.get(0); } else { - return "Scan result has " + errors.size() + " errors. The first error is: " + errors.get(0); + return "Container has " + errors.size() + " errors. The first error is: " + errors.get(0); } } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java index d24f4425483..3b9c4a93ec5 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java @@ -284,22 +284,34 @@ public static DataScanResult getHealthyDataScanResult() { * Construct an unhealthy scan result to use for testing purposes. */ public static DataScanResult getUnhealthyDataScanResult() { - ContainerScanError error = new ContainerScanError(ContainerScanError.FailureType.CORRUPT_CHUNK, - new File(""), new IOException("Fake data corruption failure for testing")); - return DataScanResult.fromErrors(Collections.singletonList(error), new ContainerMerkleTree()); + return DataScanResult.fromErrors(Collections.singletonList(getDataScanError()), new ContainerMerkleTree()); } public static MetadataScanResult getHealthyMetadataScanResult() { return MetadataScanResult.fromErrors(Collections.emptyList()); } + /** + * Construct a generic data scan error that can be used for testing. + */ + public static ContainerScanError getDataScanError() { + return new ContainerScanError(ContainerScanError.FailureType.CORRUPT_CHUNK, new File(""), + new IOException("Fake data corruption failure for testing")); + } + + /** + * Construct a generic metadata scan error that can be used for testing. + */ + public static ContainerScanError getMetadataScanError() { + return new ContainerScanError(ContainerScanError.FailureType.CORRUPT_CONTAINER_FILE, new File(""), + new IOException("Fake metadata corruption failure for testing")); + } + /** * Construct an unhealthy scan result to use for testing purposes. */ public static MetadataScanResult getUnhealthyMetadataScanResult() { - ContainerScanError error = new ContainerScanError(ContainerScanError.FailureType.CORRUPT_CONTAINER_FILE, - new File(""), new IOException("Fake metadata corruption failure for testing")); - return DataScanResult.fromErrors(Collections.singletonList(error)); + return DataScanResult.fromErrors(Collections.singletonList(getMetadataScanError())); } public static KeyValueContainer addContainerToDeletedDir( diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java new file mode 100644 index 00000000000..470197e1f82 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError; +import org.apache.ozone.test.GenericTestUtils; + +import java.io.File; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.regex.Pattern; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Represents a type of container corruption that can be injected into a container for testing. + * Currently this class only supports file per block layout. + */ +public enum TestContainerCorruptions { + MISSING_CHUNKS_DIR((container, blockID) -> { + File chunksDir = new File(container.getContainerData().getContainerPath(), + "chunks"); + try { + FileUtils.deleteDirectory(chunksDir); + } catch (IOException ex) { + // Fail the test. + throw new UncheckedIOException(ex); + } + assertFalse(chunksDir.exists()); + }, ContainerScanError.FailureType.MISSING_CHUNKS_DIR), + + MISSING_METADATA_DIR((container, blockID) -> { + File metadataDir = + new File(container.getContainerData().getContainerPath(), + "metadata"); + try { + FileUtils.deleteDirectory(metadataDir); + } catch (IOException ex) { + // Fail the test. + throw new UncheckedIOException(ex); + } + assertFalse(metadataDir.exists()); + }, ContainerScanError.FailureType.MISSING_METADATA_DIR), + + MISSING_CONTAINER_FILE((container, blockID) -> { + File containerFile = container.getContainerFile(); + assertTrue(containerFile.delete()); + assertFalse(containerFile.exists()); + }, ContainerScanError.FailureType.MISSING_CONTAINER_FILE), + + MISSING_CONTAINER_DIR((container, blockID) -> { + File containerDir = + new File(container.getContainerData().getContainerPath()); + try { + FileUtils.deleteDirectory(containerDir); + } catch (IOException ex) { + // Fail the test. + throw new UncheckedIOException(ex); + } + assertFalse(containerDir.exists()); + }, ContainerScanError.FailureType.MISSING_CONTAINER_DIR), + + MISSING_BLOCK((container, blockID) -> { + File blockFile = getBlock(container, blockID); + assertTrue(blockFile.delete()); + }, ContainerScanError.FailureType.MISSING_CHUNK_FILE), + + CORRUPT_CONTAINER_FILE((container, blockID) -> { + File containerFile = container.getContainerFile(); + corruptFile(containerFile); + }, ContainerScanError.FailureType.CORRUPT_CONTAINER_FILE), + + TRUNCATED_CONTAINER_FILE((container, blockID) -> { + File containerFile = container.getContainerFile(); + truncateFile(containerFile); + }, ContainerScanError.FailureType.CORRUPT_CONTAINER_FILE), + + CORRUPT_BLOCK((container, blockID) -> { + File blockFile = getBlock(container, blockID); + corruptFile(blockFile); + }, ContainerScanError.FailureType.CORRUPT_CHUNK), + + TRUNCATED_BLOCK((container, blockID) -> { + File blockFile = getBlock(container, blockID); + truncateFile(blockFile); + }, ContainerScanError.FailureType.INCONSISTENT_CHUNK_LENGTH); + + private final BiConsumer, Long> corruption; + private final ContainerScanError.FailureType expectedResult; + + TestContainerCorruptions(BiConsumer, Long> corruption, ContainerScanError.FailureType expectedResult) { + this.corruption = corruption; + this.expectedResult = expectedResult; + + } + + public void applyTo(Container container) { + corruption.accept(container, -1L); + } + + public void applyTo(Container container, long blockID) { + corruption.accept(container, blockID); + } + + /** + * Check that the correct corruption type was written to the container log for the provided container. + */ + public void assertLogged(long containerID, int numErrors, GenericTestUtils.LogCapturer logCapturer) { + // Enable multiline regex mode with "(?m)". This allows ^ to check for the start of a line in a multiline string. + // The log will have captured lines from all previous tests as well since we re-use the same cluster. + Pattern logLine = Pattern.compile("(?m)^ID=" + containerID + ".*" + " Container has " + numErrors + + " error.*" + expectedResult.toString()); + assertThat(logCapturer.getOutput()).containsPattern(logLine); + } + + /** + * Check that the correct corruption type was written to the container log for the provided container. + */ + public void assertLogged(long containerID, GenericTestUtils.LogCapturer logCapturer) { + // Enable multiline regex mode with "(?m)". This allows ^ to check for the start of a line in a multiline string. + // The log will have captured lines from all previous tests as well since we re-use the same cluster. + Pattern logLine = Pattern.compile("(?m)^ID=" + containerID + ".*" + " Container has .*error.*" + + expectedResult.toString()); + assertThat(logCapturer.getOutput()).containsPattern(logLine); + } + + public ContainerScanError.FailureType getExpectedResult() { + return expectedResult; + } + + /** + * Get all container corruption types as parameters for junit 4 + * parameterized tests, except the ones specified. + */ + public static Set getAllParamsExcept( + TestContainerCorruptions... exclude) { + Set includeSet = + EnumSet.allOf(TestContainerCorruptions.class); + Arrays.asList(exclude).forEach(includeSet::remove); + return includeSet; + } + + /** + * Overwrite the file with random bytes. + */ + private static void corruptFile(File file) { + try { + final int length = (int) file.length(); + + Path path = file.toPath(); + final byte[] original = IOUtils.readFully(Files.newInputStream(path), length); + + // Corrupt the last byte and middle bytes of the block. The scanner should log this as two errors. + final byte[] corruptedBytes = Arrays.copyOf(original, length); + corruptedBytes[length - 1] = (byte) (original[length - 1] << 1); + corruptedBytes[length / 2] = (byte) (original[length / 2] << 1); + + Files.write(path, corruptedBytes, + StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); + + assertThat(IOUtils.readFully(Files.newInputStream(path), length)) + .isEqualTo(corruptedBytes) + .isNotEqualTo(original); + } catch (IOException ex) { + // Fail the test. + throw new UncheckedIOException(ex); + } + } + + private static File getBlock(Container container, long blockID) { + File blockFile; + File chunksDir = new File(container.getContainerData().getContainerPath(), + "chunks"); + // Negative values are an internal placeholder to get the first block in a container. + if (blockID < 0) { + File[] blockFiles = chunksDir.listFiles((dir, name) -> name.endsWith(".block")); + assertNotNull(blockFiles); + assertTrue(blockFiles.length > 0); + blockFile = blockFiles[0]; + } else { + // Get the block by ID. + blockFile = new File(chunksDir, blockID + ".block"); + } + assertTrue(blockFile.exists()); + return blockFile; + } + + /** + * Truncate the file to 0 bytes in length. + */ + private static void truncateFile(File file) { + try { + Files.write(file.toPath(), new byte[0], + StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); + + assertEquals(0, file.length()); + } catch (IOException ex) { + // Fail the test. + throw new UncheckedIOException(ex); + } + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java index a470cce402f..b6da73b7ea6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java @@ -25,16 +25,39 @@ import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.ozone.container.common.helpers.BlockData; +import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator; import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerLocationUtil; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError.FailureType; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; +import org.apache.hadoop.ozone.container.ozoneimpl.DataScanResult; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.RandomAccessFile; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.CORRUPT_BLOCK; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.CORRUPT_CONTAINER_FILE; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_BLOCK; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CHUNKS_DIR; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CONTAINER_DIR; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CONTAINER_FILE; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_METADATA_DIR; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.TRUNCATED_BLOCK; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.TRUNCATED_CONTAINER_FILE; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -42,11 +65,126 @@ import static org.mockito.Mockito.mock; /** - * Basic sanity test for the KeyValueContainerCheck class. + * Test the KeyValueContainerCheck class's ability to detect container errors. */ public class TestKeyValueContainerCheck extends TestKeyValueContainerIntegrityChecks { + private static final Logger LOG = LoggerFactory.getLogger(TestKeyValueContainerCheck.class); + + /** + * Container fault injection is not supported with the old file per chunk layout. + * @return The container versions that should be tested with fault injection. + */ + private static Stream provideContainerVersions() { + return ContainerTestVersionInfo.getLayoutList().stream() + .filter(c -> c.getLayout() != ContainerLayoutVersion.FILE_PER_CHUNK); + } + + /** + * @return A matrix of the container versions that should be tested with fault injection paired with each type of + * metadata fault. + */ + private static Stream provideMetadataCorruptions() { + List metadataCorruptions = Arrays.asList( + MISSING_CHUNKS_DIR, + MISSING_METADATA_DIR, + MISSING_CONTAINER_DIR, + MISSING_CONTAINER_FILE, + CORRUPT_CONTAINER_FILE, + TRUNCATED_CONTAINER_FILE + ); + return provideContainerVersions() + .flatMap(version -> metadataCorruptions.stream().map(corruption -> Arguments.of(version, corruption))); + } + + /** + * When the scanner encounters an issue with container metadata, it should fail the scan immediately. + * Metadata is required before reading the data. + */ + @ParameterizedTest + @MethodSource("provideMetadataCorruptions") + public void testExitEarlyOnMetadataError(ContainerTestVersionInfo versionInfo, + TestContainerCorruptions metadataCorruption) throws Exception { + initTestData(versionInfo); + long containerID = 101; + int deletedBlocks = 0; + int normalBlocks = 3; + OzoneConfiguration conf = getConf(); + ContainerScannerConfiguration c = conf.getObject(ContainerScannerConfiguration.class); + DataTransferThrottler throttler = new DataTransferThrottler(c.getBandwidthPerVolume()); + + KeyValueContainer container = createContainerWithBlocks(containerID, + normalBlocks, deletedBlocks, true); + KeyValueContainerCheck kvCheck = new KeyValueContainerCheck(conf, container); + + DataScanResult result = kvCheck.fullCheck(throttler, null); + assertTrue(result.isHealthy()); + + // Inject a metadata and a data error. + metadataCorruption.applyTo(container); + // All other metadata failures are independent of the block files, so we can add a data failure later in the scan. + if (metadataCorruption != MISSING_CHUNKS_DIR && metadataCorruption != MISSING_CONTAINER_DIR) { + CORRUPT_BLOCK.applyTo(container); + } + + result = kvCheck.fullCheck(throttler, null); + assertFalse(result.isHealthy()); + // Scan should have failed after the first metadata error and not made it to the data error. + assertEquals(1, result.getErrors().size()); + assertEquals(metadataCorruption.getExpectedResult(), result.getErrors().get(0).getFailureType()); + } + + /** + * When the scanner encounters an issues with container data, it should continue scanning to collect all issues + * among all blocks. + */ + @ParameterizedTest + @MethodSource("provideContainerVersions") + public void testAllDataErrorsCollected(ContainerTestVersionInfo versionInfo) throws Exception { + initTestData(versionInfo); + + long containerID = 101; + int deletedBlocks = 0; + int normalBlocks = 6; + OzoneConfiguration conf = getConf(); + ContainerScannerConfiguration c = conf.getObject(ContainerScannerConfiguration.class); + DataTransferThrottler throttler = new DataTransferThrottler(c.getBandwidthPerVolume()); + KeyValueContainer container = createContainerWithBlocks(containerID, + normalBlocks, deletedBlocks, true); + KeyValueContainerCheck kvCheck = new KeyValueContainerCheck(conf, container); + + DataScanResult result = kvCheck.fullCheck(throttler, null); + assertTrue(result.isHealthy()); + + // Put different types of block failures in the middle of the container. + CORRUPT_BLOCK.applyTo(container, 1); + MISSING_BLOCK.applyTo(container, 2); + TRUNCATED_BLOCK.applyTo(container, 4); + List expectedErrors = new ArrayList<>(); + // Corruption is applied to two different chunks within the block. + expectedErrors.add(CORRUPT_BLOCK.getExpectedResult()); + expectedErrors.add(CORRUPT_BLOCK.getExpectedResult()); + expectedErrors.add(MISSING_BLOCK.getExpectedResult()); + // When a block file is truncated, all chunks in the block will be reported as missing. + // This is expected since reconciliation will do the repair at the chunk level. + for (int i = 0; i < CHUNKS_PER_BLOCK; i++) { + expectedErrors.add(TRUNCATED_BLOCK.getExpectedResult()); + } + + result = kvCheck.fullCheck(throttler, null); + result.getErrors().forEach(e -> LOG.info("Error detected: {}", e)); + + assertFalse(result.isHealthy()); + // Check that all data errors were detected in order. + // TODO HDDS-10374 Use merkle tree to check the actual content affected by the errors. + assertEquals(expectedErrors.size(), result.getErrors().size()); + List actualErrors = result.getErrors().stream() + .map(ContainerScanError::getFailureType) + .collect(Collectors.toList()); + assertEquals(expectedErrors, actualErrors); + } + /** * Sanity test, when there are no corruptions induced. */ @@ -144,7 +282,7 @@ void testKeyValueContainerCheckDeletedContainer(ContainerTestVersionInfo version assertTrue(result.isHealthy()); assertFalse(result.isDeleted()); - // When a container is not marked for deletion and it has peices missing, the scan should fail. + // When a container is not marked for deletion and it has pieces missing, the scan should fail. File metadataDir = new File(container.getContainerData().getChunksPath()); FileUtils.deleteDirectory(metadataDir); assertFalse(metadataDir.exists()); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestDataScanResult.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestDataScanResult.java new file mode 100644 index 00000000000..cfdea76ba53 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestDataScanResult.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.ozoneimpl; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTree; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collections; + +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.getDataScanError; +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.getMetadataScanError; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertSame; + +class TestDataScanResult { + private static final ContainerMerkleTree TREE = ContainerMerkleTreeTestUtils.buildTestTree(new OzoneConfiguration()); + + @Test + void testFromEmptyErrors() { + // No errors means the scan result is healthy. + DataScanResult result = DataScanResult.fromErrors(Collections.emptyList(), TREE); + assertTrue(result.isHealthy()); + assertFalse(result.isDeleted()); + assertTrue(result.getErrors().isEmpty()); + assertTrue(result.toString().contains("0 errors")); + assertSame(TREE, result.getDataTree()); + } + + @Test + void testUnhealthyMetadata() { + MetadataScanResult metadataResult = + MetadataScanResult.fromErrors(Collections.singletonList(getMetadataScanError())); + DataScanResult result = DataScanResult.unhealthyMetadata(metadataResult); + assertFalse(result.isHealthy()); + assertFalse(result.isDeleted()); + assertEquals(1, result.getErrors().size()); + assertTrue(result.toString().contains("1 error")); + // Tree should be empty if the metadata scan failed, since the data scan could not proceed. + assertEquals(0, result.getDataTree().toProto().getBlockMerkleTreeCount()); + } + + @Test + void testFromErrors() { + DataScanResult result = DataScanResult.fromErrors(Arrays.asList(getDataScanError(), getDataScanError()), TREE); + assertFalse(result.isHealthy()); + assertFalse(result.isDeleted()); + assertEquals(2, result.getErrors().size()); + assertTrue(result.toString().contains("2 errors")); + // Tree should just be passed through from the result. It will not have the errors we passed in. + assertSame(TREE, result.getDataTree()); + } + + @Test + void testDeleted() { + DataScanResult result = DataScanResult.deleted(); + assertTrue(result.isHealthy()); + assertTrue(result.isDeleted()); + assertTrue(result.getErrors().isEmpty()); + assertTrue(result.toString().contains("deleted")); + // Tree should be empty if the container was deleted. + assertEquals(0, result.getDataTree().toProto().getBlockMerkleTreeCount()); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestMetadataScanResult.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestMetadataScanResult.java new file mode 100644 index 00000000000..b7534c0bd23 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestMetadataScanResult.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.ozoneimpl; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collections; + +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.getMetadataScanError; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestMetadataScanResult { + @Test + void testFromEmptyErrors() { + // No errors means the scan result is healthy. + MetadataScanResult result = MetadataScanResult.fromErrors(Collections.emptyList()); + assertTrue(result.isHealthy()); + assertFalse(result.isDeleted()); + assertTrue(result.getErrors().isEmpty()); + assertTrue(result.toString().contains("0 errors")); + } + + @Test + void testFromErrors() { + MetadataScanResult result = + MetadataScanResult.fromErrors(Arrays.asList(getMetadataScanError(), getMetadataScanError())); + assertFalse(result.isHealthy()); + assertFalse(result.isDeleted()); + assertEquals(2, result.getErrors().size()); + assertTrue(result.toString().contains("2 errors")); + } + + @Test + void testDeleted() { + MetadataScanResult result = MetadataScanResult.deleted(); + assertTrue(result.isHealthy()); + assertTrue(result.isDeleted()); + assertTrue(result.getErrors().isEmpty()); + assertTrue(result.toString().contains("deleted")); + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java index b372c434a07..80dbda64bfc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; +import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; import org.apache.hadoop.ozone.container.ozoneimpl.BackgroundContainerDataScanner; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.ozone.test.GenericTestUtils; @@ -69,7 +70,7 @@ static void init() throws Exception { @ParameterizedTest // Background container data scanner should be able to detect all errors. @EnumSource - void testCorruptionDetected(ContainerCorruptions corruption) + void testCorruptionDetected(TestContainerCorruptions corruption) throws Exception { pauseScanner(); @@ -90,9 +91,10 @@ void testCorruptionDetected(ContainerCorruptions corruption) // Wait for SCM to get a report of the unhealthy replica. waitForScmToSeeUnhealthyReplica(containerID); - // If the block is truncated, every chunk in the block will register an error. - if (corruption == ContainerCorruptions.TRUNCATED_BLOCK) { - corruption.assertLogged(containerID, 2, logCapturer); + if (corruption == TestContainerCorruptions.TRUNCATED_BLOCK || + corruption == TestContainerCorruptions.CORRUPT_BLOCK) { + // These errors will affect multiple chunks and result in multiple log messages. + corruption.assertLogged(containerID, logCapturer); } else { // Other corruption types will only lead to a single error. corruption.assertLogged(containerID, 1, logCapturer); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java index b6bbc602415..0678190d47c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; +import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; import org.apache.hadoop.ozone.container.ozoneimpl.BackgroundContainerMetadataScanner; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.ozone.test.GenericTestUtils; @@ -48,11 +49,11 @@ class TestBackgroundContainerMetadataScannerIntegration private final GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.log4j2(ContainerLogger.LOG_NAME); - static Collection supportedCorruptionTypes() { - return ContainerCorruptions.getAllParamsExcept( - ContainerCorruptions.MISSING_BLOCK, - ContainerCorruptions.CORRUPT_BLOCK, - ContainerCorruptions.TRUNCATED_BLOCK); + static Collection supportedCorruptionTypes() { + return TestContainerCorruptions.getAllParamsExcept( + TestContainerCorruptions.MISSING_BLOCK, + TestContainerCorruptions.CORRUPT_BLOCK, + TestContainerCorruptions.TRUNCATED_BLOCK); } @BeforeAll @@ -85,7 +86,7 @@ static void init() throws Exception { */ @ParameterizedTest @MethodSource("supportedCorruptionTypes") - void testCorruptionDetected(ContainerCorruptions corruption) + void testCorruptionDetected(TestContainerCorruptions corruption) throws Exception { // Write data to an open and closed container. long closedContainerID = writeDataThenCloseContainer(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java index 36e83f4f0af..5f6c14bcde7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java @@ -19,8 +19,6 @@ */ package org.apache.hadoop.ozone.dn.scanner; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -41,42 +39,26 @@ import org.apache.hadoop.ozone.container.ContainerTestHelper; import org.apache.hadoop.ozone.container.TestHelper; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError.FailureType; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.GenericTestUtils.LogCapturer; import org.apache.ozone.test.LambdaTestUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Timeout; -import java.io.File; import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; import java.time.Duration; -import java.util.Arrays; -import java.util.EnumSet; -import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; -import java.util.regex.Pattern; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.client.ReplicationFactor.ONE; import static org.apache.hadoop.hdds.client.ReplicationType.RATIS; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; -import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; /** * This class tests the data scanner functionality. @@ -228,174 +210,4 @@ private OzoneOutputStream createKey(String keyName) throws Exception { return TestHelper.createKey( keyName, RATIS, ONE, 0, store, volumeName, bucketName); } - - /** - * Represents a type of container corruption that can be injected into the - * test. - */ - protected enum ContainerCorruptions { - MISSING_CHUNKS_DIR(container -> { - File chunksDir = new File(container.getContainerData().getContainerPath(), - "chunks"); - try { - FileUtils.deleteDirectory(chunksDir); - } catch (IOException ex) { - // Fail the test. - throw new UncheckedIOException(ex); - } - assertFalse(chunksDir.exists()); - }, FailureType.MISSING_CHUNKS_DIR), - - MISSING_METADATA_DIR(container -> { - File metadataDir = - new File(container.getContainerData().getContainerPath(), - "metadata"); - try { - FileUtils.deleteDirectory(metadataDir); - } catch (IOException ex) { - // Fail the test. - throw new UncheckedIOException(ex); - } - assertFalse(metadataDir.exists()); - }, FailureType.MISSING_METADATA_DIR), - - MISSING_CONTAINER_FILE(container -> { - File containerFile = container.getContainerFile(); - assertTrue(containerFile.delete()); - assertFalse(containerFile.exists()); - }, FailureType.MISSING_CONTAINER_FILE), - - MISSING_CONTAINER_DIR(container -> { - File containerDir = - new File(container.getContainerData().getContainerPath()); - try { - FileUtils.deleteDirectory(containerDir); - } catch (IOException ex) { - // Fail the test. - throw new UncheckedIOException(ex); - } - assertFalse(containerDir.exists()); - }, FailureType.MISSING_CONTAINER_DIR), - - MISSING_BLOCK(container -> { - File chunksDir = new File( - container.getContainerData().getContainerPath(), "chunks"); - for (File blockFile: - chunksDir.listFiles((dir, name) -> name.endsWith(".block"))) { - try { - Files.delete(blockFile.toPath()); - } catch (IOException ex) { - // Fail the test. - throw new UncheckedIOException(ex); - } - } - }, FailureType.MISSING_CHUNK_FILE), - - CORRUPT_CONTAINER_FILE(container -> { - File containerFile = container.getContainerFile(); - corruptFile(containerFile); - }, FailureType.CORRUPT_CONTAINER_FILE), - - TRUNCATED_CONTAINER_FILE(container -> { - File containerFile = container.getContainerFile(); - truncateFile(containerFile); - }, FailureType.CORRUPT_CONTAINER_FILE), - - CORRUPT_BLOCK(container -> { - File chunksDir = new File(container.getContainerData().getContainerPath(), - "chunks"); - Optional blockFile = Arrays.stream(Objects.requireNonNull( - chunksDir.listFiles((dir, name) -> name.endsWith(".block")))) - .findFirst(); - assertTrue(blockFile.isPresent()); - corruptFile(blockFile.get()); - }, FailureType.CORRUPT_CHUNK), - - TRUNCATED_BLOCK(container -> { - File chunksDir = new File(container.getContainerData().getContainerPath(), - "chunks"); - Optional blockFile = Arrays.stream(Objects.requireNonNull( - chunksDir.listFiles((dir, name) -> name.endsWith(".block")))) - .findFirst(); - assertTrue(blockFile.isPresent()); - truncateFile(blockFile.get()); - }, FailureType.INCONSISTENT_CHUNK_LENGTH); - - private final Consumer> corruption; - private final FailureType expectedResult; - - ContainerCorruptions(Consumer> corruption, FailureType expectedResult) { - this.corruption = corruption; - this.expectedResult = expectedResult; - - } - - public void applyTo(Container container) { - corruption.accept(container); - } - - /** - * Check that the correct corruption type was written to the container log for the provided container. - */ - public void assertLogged(long containerID, int numErrors, LogCapturer logCapturer) { - // Enable multiline regex mode with "(?m)". This allows ^ to check for the start of a line in a multiline string. - // The log will have captured lines from all previous tests as well since we re-use the same cluster. - Pattern logLine = Pattern.compile("(?m)^ID=" + containerID + ".*" + " Scan result has " + numErrors + - " error.*" + expectedResult.toString()); - assertThat(logCapturer.getOutput()).containsPattern(logLine); - } - - /** - * Get all container corruption types as parameters for junit 4 - * parameterized tests, except the ones specified. - */ - public static Set getAllParamsExcept( - ContainerCorruptions... exclude) { - Set includeSet = - EnumSet.allOf(ContainerCorruptions.class); - Arrays.asList(exclude).forEach(includeSet::remove); - return includeSet; - } - - /** - * Overwrite the file with random bytes. - */ - private static void corruptFile(File file) { - try { - final int length = (int) file.length(); - - Path path = file.toPath(); - final byte[] original = IOUtils.readFully(Files.newInputStream(path), length); - - // Corrupt the last byte of the last chunk. This should map to a single error from the scanner. - final byte[] corruptedBytes = Arrays.copyOf(original, length); - corruptedBytes[length - 1] = (byte) (original[length - 1] << 1); - - Files.write(path, corruptedBytes, - StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); - - assertThat(IOUtils.readFully(Files.newInputStream(path), length)) - .isEqualTo(corruptedBytes) - .isNotEqualTo(original); - } catch (IOException ex) { - // Fail the test. - throw new UncheckedIOException(ex); - } - } - - /** - * Truncate the file to 0 bytes in length. - */ - private static void truncateFile(File file) { - try { - Files.write(file.toPath(), new byte[0], - StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); - - assertEquals(0, file.length()); - } catch (IOException ex) { - // Fail the test. - throw new UncheckedIOException(ex); - } - } - } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java index 37194812c90..af94506c827 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; +import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.ozone.test.GenericTestUtils; @@ -57,14 +58,14 @@ class TestOnDemandContainerDataScannerIntegration - Block checksums are verified on the client side. If there is a checksum error during read, the datanode will not learn about it. */ - static Collection supportedCorruptionTypes() { - return ContainerCorruptions.getAllParamsExcept( - ContainerCorruptions.MISSING_METADATA_DIR, - ContainerCorruptions.MISSING_CONTAINER_FILE, - ContainerCorruptions.CORRUPT_CONTAINER_FILE, - ContainerCorruptions.TRUNCATED_CONTAINER_FILE, - ContainerCorruptions.CORRUPT_BLOCK, - ContainerCorruptions.TRUNCATED_BLOCK); + static Collection supportedCorruptionTypes() { + return TestContainerCorruptions.getAllParamsExcept( + TestContainerCorruptions.MISSING_METADATA_DIR, + TestContainerCorruptions.MISSING_CONTAINER_FILE, + TestContainerCorruptions.CORRUPT_CONTAINER_FILE, + TestContainerCorruptions.TRUNCATED_CONTAINER_FILE, + TestContainerCorruptions.CORRUPT_BLOCK, + TestContainerCorruptions.TRUNCATED_BLOCK); } @BeforeAll @@ -90,7 +91,7 @@ static void init() throws Exception { */ @ParameterizedTest @MethodSource("supportedCorruptionTypes") - void testCorruptionDetected(ContainerCorruptions corruption) + void testCorruptionDetected(TestContainerCorruptions corruption) throws Exception { String keyName = "testKey"; long containerID = writeDataThenCloseContainer(keyName);