Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add h2 logging #2198

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -237,12 +237,14 @@ if (LBANN_WITH_DISTCONV)
find_package(DiHydrogen 0.3.0 CONFIG REQUIRED COMPONENTS Meta Patterns DistConv)
set(LBANN_HAS_DISTCONV TRUE)
set(LBANN_H2_LIBS
H2::H2Core
H2::H2Meta
H2::H2Patterns
H2::H2DistConv)
else ()
find_package(DiHydrogen CONFIG REQUIRED COMPONENTS Meta Patterns)
set(LBANN_H2_LIBS
H2::H2Core
H2::H2Meta
H2::H2Patterns)
endif ()
Expand Down Expand Up @@ -660,6 +662,7 @@ target_link_libraries(lbann PUBLIC
${CLARA_LIBRARIES}
${LBANN_PYTHON_LIBS}
protobuf::libprotobuf
spdlog::spdlog
${CEREAL_LIBRARIES}
ZSTR::ZSTR)

Expand Down
1 change: 1 addition & 0 deletions include/lbann/lbann.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,5 +231,6 @@
#include "lbann/utils/stack_profiler.hpp"
#include "lbann/utils/stack_trace.hpp"
#include "lbann/utils/summary.hpp"
#include "lbann/utils/logging.hpp"

#endif // LBANN_LBANN_HPP_INCLUDED
1 change: 1 addition & 0 deletions include/lbann/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ set_full_path(THIS_DIR_HEADERS
im2col.hpp
jag_utils.hpp
lbann_library.hpp
logging.hpp
make_abstract.hpp
memory.hpp
mild_exception.hpp
Expand Down
18 changes: 2 additions & 16 deletions include/lbann/utils/exception.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#define LBANN_UTILS_EXCEPTION_HPP_INCLUDED

#include "lbann/comm.hpp"
#include "lbann/utils/logging.hpp"

#include <exception>
#include <iostream>
Expand All @@ -50,22 +51,7 @@
} while (0)

// Macro to print a warning to standard error stream.
#define LBANN_WARNING(...) \
do { \
const int rank_LBANN_WARNING = lbann::get_rank_in_world(); \
std::cerr << lbann::build_string( \
"LBANN warning", \
(rank_LBANN_WARNING >= 0 \
? " on rank " + std::to_string(rank_LBANN_WARNING) \
: std::string()), \
" (", \
__FILE__, \
":", \
__LINE__, \
"): ", \
__VA_ARGS__) \
<< std::endl; \
} while (0)
#define LBANN_WARNING(...) LBANN_WARN(lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__)

// Macro to print a message to standard cout stream.
#define LBANN_MSG(...) \
Expand Down
115 changes: 115 additions & 0 deletions include/lbann/utils/logging.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
// Produced at the Lawrence Livermore National Laboratory.
// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
// the CONTRIBUTORS file. <[email protected]>
//
// LLNL-CODE-697807.
// All rights reserved.
//
// This file is part of LBANN: Livermore Big Artificial Neural Network
// Toolkit. For details, see http://software.llnl.gov/LBANN or
// https://github.com/LLNL/LBANN.
//
// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the license.
////////////////////////////////////////////////////////////////////////////////

#ifndef LBANN_LOGGING_HPP_INCLUDED
#define LBANN_LOGGING_HPP_INCLUDED

#include <h2/utils/Logger.hpp>

#include <iostream>
#include <cstdlib>
#include <vector>

namespace lbann {
namespace logging {

// Better than using raw strings
enum LBANN_Logger_ID
{
LOG_RT,
LOG_IO,
LOG_TRAIN,
};

//
void setup_loggers();

// Raw string may be useful for debugging
char const* logger_id_str(LBANN_Logger_ID id);

// Access the actual logger object
h2::Logger& get(LBANN_Logger_ID id);

}// namespace logging
}// namespace lbann

// #defines can go here. Make sure they can go anywhere:
#define LBANN_LOG(logger_id, level, ...) \
do { \
auto& lbann_log_logger = ::lbann::logging::get(logger_id); \
if (lbann_log_logger.should_log(level)) { \
lbann_log_logger.get().log(::spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, ::h2::to_spdlog_level(level), __VA_ARGS__); \
} \
} while (0)

#define LBANN_TRACE(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::TRACE, __VA_ARGS__)
#define LBANN_DEBUG(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::DEBUG, __VA_ARGS__)
#define LBANN_INFO(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::INFO, __VA_ARGS__)
#define LBANN_WARN(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::WARN, __VA_ARGS__)
#define LBANN_ERR(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::ERROR, __VA_ARGS__)
#define LBANN_CRIT(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::CRITICAL, __VA_ARGS__)

// Run time
#define LBANN_RT_TRACE(...) LBANN_TRACE(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__)

#define LBANN_RT_DEBUG(...) LBANN_DEBUG(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__)

#define LBANN_RT_INFO(...) LBANN_INFO(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__)

#define LBANN_RT_WARN(...) LBANN_WARN(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__)

#define LBANN_RT_ERR(...) LBANN_ERR(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__)

#define LBANN_RT_CRIT(...) LBANN_CRIT(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__)

// IO
#define LBANN_IO_TRACE(...) LBANN_TRACE(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__)

#define LBANN_IO_DEBUG(...) LBANN_DEBUG(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__)

#define LBANN_IO_INFO(...) LBANN_INFO(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__)

#define LBANN_IO_WARN(...) LBANN_WARN(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__)

#define LBANN_IO_ERR(...) LBANN_ERR(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__)

#define LBANN_IO_CRIT(...) LBANN_CRIT(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__)

// Training
#define LBANN_TRAIN_TRACE(...) LBANN_TRACE(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__)

#define LBANN_TRAIN_DEBUG(...) LBANN_DEBUG(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__)

#define LBANN_TRAIN_INFO(...) LBANN_INFO(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__)

#define LBANN_TRAIN_WARN(...) LBANN_WARN(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__)

#define LBANN_TRAIN_ERR(...) LBANN_ERR(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__)

#define LBANN_TRAIN_CRIT(...) LBANN_CRIT(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__)


#endif // LBANN_LOGGING_HPP_INCLUDED
5 changes: 4 additions & 1 deletion src/base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "lbann/utils/omp_diagnostics.hpp"
#include "lbann/utils/options.hpp"
#include "lbann/utils/stack_trace.hpp"
#include "lbann/utils/logging.hpp"

#ifdef LBANN_HAS_DNN_LIB
#include "lbann/utils/dnn_lib/helpers.hpp"
Expand Down Expand Up @@ -85,7 +86,6 @@ lbann_comm& get_current_comm() noexcept { return *world_comm_; }

auto lbann::initialize_lbann(El::mpi::Comm&& c) -> std::unique_ptr<lbann_comm>
{

// Parse command-line arguments and environment variables
auto& arg_parser = global_argument_parser();
(void)arg_parser;
Expand Down Expand Up @@ -191,6 +191,9 @@ void lbann::finalize_lbann(lbann_comm* comm)

auto lbann::initialize(int& argc, char**& argv) -> world_comm_ptr
{
//FIXME(KLG): Can this go here?
logging::setup_loggers();

// Parse command-line arguments and environment variables
auto& arg_parser = global_argument_parser();
(void)arg_parser;
Expand Down
1 change: 1 addition & 0 deletions src/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ set_full_path(THIS_DIR_SOURCES
im2col.cpp
jag_common.cpp
lbann_library.cpp
logging.cpp
miopen.cpp
number_theory.cpp
omp_diagnostics.cpp
Expand Down
79 changes: 79 additions & 0 deletions src/utils/logging.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
// Produced at the Lawrence Livermore National Laboratory.
// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
// the CONTRIBUTORS file. <[email protected]>
//
// LLNL-CODE-697807.
// All rights reserved.
//
// This file is part of LBANN: Livermore Big Artificial Neural Network
// Toolkit. For details, see http://software.llnl.gov/LBANN or
// https://github.com/LLNL/LBANN.
//
// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the license.
////////////////////////////////////////////////////////////////////////////////

#include <lbann/utils/logging.hpp>
#include "lbann/utils/exception.hpp"
#include <h2/utils/Logger.hpp>

#include <iostream>
#include <cstdlib>
#include <vector>

namespace lbann {
namespace logging {

static h2::Logger io_logger("IO");
static h2::Logger rt_logger("RT");
static h2::Logger train_logger("TRAIN");
static std::vector<h2::Logger*> logger_vec;

void setup_loggers()
{
logger_vec.insert(logger_vec.end(), {
&io_logger, &rt_logger, &train_logger });
h2::setup_levels(logger_vec, "LBANN_LOG_LEVEL");
}

char const* logger_id_str(LBANN_Logger_ID id)
{
switch (id) {
case LBANN_Logger_ID::LOG_RT:
return "LOG_RT";
case LBANN_Logger_ID::LOG_IO:
return "LOG_IO";
case LBANN_Logger_ID::LOG_TRAIN:
return "LOG_TRAIN";
default:
throw lbann_exception("Unknown LBANN_Logger_ID");
}
}

h2::Logger& get(LBANN_Logger_ID id)
{
switch (id) {
case LBANN_Logger_ID::LOG_RT:
return rt_logger;
case LBANN_Logger_ID::LOG_IO:
return io_logger;
case LBANN_Logger_ID::LOG_TRAIN:
return train_logger;
default:
throw lbann_exception("Unknown LBANN_Logger_ID");
}
}

}// namespace logging
}// namespace lbann