From ef8e1ebaabd12ba43a59484e319bc0ea3270040d Mon Sep 17 00:00:00 2001 From: thucpham Date: Thu, 26 Oct 2023 10:55:13 +0200 Subject: [PATCH 1/8] st-2361 [*] connan2 migration --- CMakeLists.txt | 2 +- cli/src/CMakeLists.txt | 2 +- src/CMakeLists.txt | 4 +++- test/CMakeLists.txt | 7 ++++--- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e1601f..9e689fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ project(FuzzyMatch) cmake_minimum_required(VERSION 3.5.1) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") if(TARGET OpenNMTTokenizer) diff --git a/cli/src/CMakeLists.txt b/cli/src/CMakeLists.txt index 5050fee..38923ee 100644 --- a/cli/src/CMakeLists.txt +++ b/cli/src/CMakeLists.txt @@ -10,5 +10,5 @@ target_include_directories(${PROJECT_NAME} PUBLIC target_link_libraries(${PROJECT_NAME}-cli ${PROJECT_NAME} - ${Boost_LIBRARIES} + Boost::program_options ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d818197..746d385 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -38,6 +38,8 @@ target_include_directories(${PROJECT_NAME} PUBLIC target_link_libraries(${PROJECT_NAME} ${OPENNMT_TOKENIZER_LIB} ${ICU_LIBRARIES} - ${Boost_LIBRARIES} + Boost::serialization + Boost::iostreams + Boost::system Threads::Threads ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 27026d3..f0a9997 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,7 +2,7 @@ add_executable(${PROJECT_NAME}-test test.cc) find_package(GTest REQUIRED) -find_package(Boost COMPONENTS filesystem REQUIRED) +find_package(Boost COMPONENTS filesystem system serialization REQUIRED) include_directories( ${source_dir}/include @@ -17,8 +17,9 @@ target_include_directories(${PROJECT_NAME}-test PUBLIC target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME} GTest::GTest GTest::Main - ${Boost_FILESYSTEM_LIBRARY} - ${GTEST_LIBRARIES} + Boost::filesystem + Boost::serialization + Boost::system ) set(${PROJECT_NAME}-test-args "${CMAKE_CURRENT_LIST_DIR}/data/") From 42e95bdf2b72194de88372da5f82dcbab5dcb5ee Mon Sep 17 00:00:00 2001 From: thucpham Date: Mon, 6 Nov 2023 17:00:52 +0100 Subject: [PATCH 2/8] refs ST-2362 [*] update gtest in cmake --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f0a9997..ca6675d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -6,7 +6,7 @@ find_package(Boost COMPONENTS filesystem system serialization REQUIRED) include_directories( ${source_dir}/include - ${GTEST_INCLUDE_DIRS} + ${GTest_INCLUDE_DIRS} ) target_include_directories(${PROJECT_NAME}-test PUBLIC From 12a930ee0424eedfa25f2d1492cb32302ca90462 Mon Sep 17 00:00:00 2001 From: thucpham Date: Fri, 10 Nov 2023 14:26:20 +0100 Subject: [PATCH 3/8] refs ST-2362 [*] fix tcmalloc --- cli/src/CMakeLists.txt | 1 + test/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/cli/src/CMakeLists.txt b/cli/src/CMakeLists.txt index 38923ee..091b231 100644 --- a/cli/src/CMakeLists.txt +++ b/cli/src/CMakeLists.txt @@ -11,4 +11,5 @@ target_include_directories(${PROJECT_NAME} PUBLIC target_link_libraries(${PROJECT_NAME}-cli ${PROJECT_NAME} Boost::program_options + gperftools::tcmalloc_minimal ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ca6675d..15e9c3b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -20,6 +20,7 @@ target_link_libraries(${PROJECT_NAME}-test Boost::filesystem Boost::serialization Boost::system + gperftools::tcmalloc_minimal ) set(${PROJECT_NAME}-test-args "${CMAKE_CURRENT_LIST_DIR}/data/") From 25f7273f80b887c79744bdc58827fc8aa345ee05 Mon Sep 17 00:00:00 2001 From: thucpham Date: Fri, 10 Nov 2023 16:25:55 +0100 Subject: [PATCH 4/8] refs ST-2362 [*] fix icu --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 746d385..df01ef3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,7 +37,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC target_link_libraries(${PROJECT_NAME} ${OPENNMT_TOKENIZER_LIB} - ${ICU_LIBRARIES} + icu::icu Boost::serialization Boost::iostreams Boost::system From 1fa8f1bd8f2de22985fbbfaa69c3c8ddd146e2e2 Mon Sep 17 00:00:00 2001 From: thucpham Date: Wed, 15 Nov 2023 14:02:47 +0100 Subject: [PATCH 5/8] refs ST-2362 [*] update tcmalloc --- cli/src/CMakeLists.txt | 1 - test/CMakeLists.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/cli/src/CMakeLists.txt b/cli/src/CMakeLists.txt index 091b231..38923ee 100644 --- a/cli/src/CMakeLists.txt +++ b/cli/src/CMakeLists.txt @@ -11,5 +11,4 @@ target_include_directories(${PROJECT_NAME} PUBLIC target_link_libraries(${PROJECT_NAME}-cli ${PROJECT_NAME} Boost::program_options - gperftools::tcmalloc_minimal ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 15e9c3b..ca6675d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -20,7 +20,6 @@ target_link_libraries(${PROJECT_NAME}-test Boost::filesystem Boost::serialization Boost::system - gperftools::tcmalloc_minimal ) set(${PROJECT_NAME}-test-args "${CMAKE_CURRENT_LIST_DIR}/data/") From 225434583e6ff713ba497e6d0159eec0d0a09717 Mon Sep 17 00:00:00 2001 From: thucpham Date: Wed, 22 Nov 2023 17:00:14 +0100 Subject: [PATCH 6/8] refs ST-2362 [*] fix fuzzy --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index df01ef3..746d385 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,7 +37,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC target_link_libraries(${PROJECT_NAME} ${OPENNMT_TOKENIZER_LIB} - icu::icu + ${ICU_LIBRARIES} Boost::serialization Boost::iostreams Boost::system From 337ae33afe6dc7cbc9647c6d030739792a9e6292 Mon Sep 17 00:00:00 2001 From: "dakun.zhang" Date: Fri, 3 May 2024 17:41:53 +0200 Subject: [PATCH 7/8] add segment alphabet for Thai and Burmese --- src/fuzzy_match.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fuzzy_match.cc b/src/fuzzy_match.cc index 84dfa83..2dcc7c5 100644 --- a/src/fuzzy_match.cc +++ b/src/fuzzy_match.cc @@ -86,6 +86,8 @@ namespace fuzzy _ptokenizer->add_alphabet_to_segment("Kanbun"); _ptokenizer->add_alphabet_to_segment("Katakana"); _ptokenizer->add_alphabet_to_segment("Hiragana"); + _ptokenizer->add_alphabet_to_segment("Thai"); + _ptokenizer->add_alphabet_to_segment("Myanmar"); // Burmese } void From 1a92274ebc93328a6ee6a78b300b9ec9693fafe4 Mon Sep 17 00:00:00 2001 From: "dakun.zhang" Date: Mon, 13 May 2024 16:09:33 +0200 Subject: [PATCH 8/8] fix systran/fuzzymatch docker image: Boost 1.74 --- include/fuzzy/sentence.hh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/fuzzy/sentence.hh b/include/fuzzy/sentence.hh index 5b77e40..d8fbc8c 100644 --- a/include/fuzzy/sentence.hh +++ b/include/fuzzy/sentence.hh @@ -4,6 +4,17 @@ #include #include +// +// Even latest Debian Sid (March 2023) uses Boost 1.74 which does not behave well with very fresh compilers and triggers this error: +// https://github.com/pavel-odintsov/fastnetmon/issues/970 +// This bug was fixed in fresh Boost versions: https://github.com/boostorg/serialization/issues/219 and we apply workaround only for 1.74 +// + +#include +#if BOOST_VERSION / 100000 == 1 && BOOST_VERSION / 100 % 1000 == 74 +#include +#endif + #include #include