From 5727743f9a3d3f01b275eb77f39e7a24b9671c4d Mon Sep 17 00:00:00 2001 From: 23rd <23rd@vivaldi.net> Date: Thu, 17 Nov 2022 20:18:43 +0300 Subject: [PATCH] Added external cld3 module. --- external/CMakeLists.txt | 3 + external/cld3/CMakeLists.txt | 100 ++++++++++++++++++++++++++ external/cld3/generate_protobuf.cmake | 59 +++++++++++++++ variables.cmake | 6 ++ 4 files changed, 168 insertions(+) create mode 100644 external/cld3/CMakeLists.txt create mode 100644 external/cld3/generate_protobuf.cmake diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index e769380..30282f0 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -36,6 +36,9 @@ endif() if (add_hunspell_library) add_checked_subdirectory(hunspell) endif() +if (add_cld3_library) + add_checked_subdirectory(cld3) +endif() add_checked_subdirectory(iconv) if (LINUX AND NOT DESKTOP_APP_DISABLE_JEMALLOC) add_checked_subdirectory(jemalloc) diff --git a/external/cld3/CMakeLists.txt b/external/cld3/CMakeLists.txt new file mode 100644 index 0000000..91d50ab --- /dev/null +++ b/external/cld3/CMakeLists.txt @@ -0,0 +1,100 @@ +# This file is part of Desktop App Toolkit, +# a set of libraries for developing nice desktop applications. +# +# For license and copyright information please follow this link: +# https://github.com/desktop-app/legal/blob/master/LEGAL + +add_library(external_cld3 INTERFACE IMPORTED GLOBAL) +add_library(desktop-app::external_cld3 ALIAS external_cld3) + +if (DESKTOP_APP_USE_PACKAGED) + # find_package(Protobuf REQUIRED) +endif() + +add_library(external_cld3_bundled STATIC) +init_target(external_cld3_bundled "(external)") + +set(cld3_loc ${third_party_loc}/cld3) +set(cld3_src ${cld3_loc}/src) + +set(gen_loc ${cld3_loc}/build/gen) +set(gen_dst ${gen_loc}/cld_3/protos) + +if (WIN32) + set(build_loc ${libs_loc}/protobuf/build/$,Debug,Release>) + set(protoc_executable ${build_loc}/protoc.exe) + set(protobuf_lib ${build_loc}/libprotobuf-lite.lib) +else() + set(protoc_executable ${libs_loc}/protobuf/build/protoc) + set(protobuf_lib ${libs_loc}/protobuf/build/libprotobuf-lite.a) + if (LINUX) + target_compile_options(external_cld3_bundled PRIVATE -Wno-implicit-fallthrough) + endif() +endif() +if (PROTOBUF_PROTOC_EXECUTABLE) + set(protoc_executable ${PROTOBUF_PROTOC_EXECUTABLE}) +endif() + +include(generate_protobuf.cmake) + +function(generate protobuf_name) + generate_single_protobuf( + external_cld3_bundled + ${gen_dst} + ${protobuf_name} + ${protoc_executable}) +endfunction() + +generate(feature_extractor.proto) +generate(sentence.proto) +generate(task_spec.proto) + +nice_target_sources(external_cld3_bundled ${cld3_src} +PRIVATE + feature_extractor.proto + sentence.proto + task_spec.proto + + base.cc + embedding_feature_extractor.cc + embedding_network.cc + feature_extractor.cc + feature_extractor.h + feature_types.cc + fml_parser.cc + language_identifier_features.cc + lang_id_nn_params.cc + nnet_language_identifier.cc + registry.cc + relevant_script_feature.cc + sentence_features.cc + task_context.cc + task_context_params.cc + unicodetext.cc + utils.cc + workspace.cc + + script_span/generated_entities.cc + script_span/getonescriptspan.cc + script_span/getonescriptspan.h + script_span/getonescriptspan_test.cc + script_span/utf8statetable.cc + script_span/offsetmap.cc + script_span/text_processing.cc + script_span/text_processing.h + script_span/fixunicodevalue.cc +) + +target_include_directories(external_cld3_bundled +PUBLIC + ${cld3_src} + ${gen_loc} + ${libs_loc}/protobuf/src + ${libs_loc}/protobuf/third_party/abseil-cpp +) + +target_link_libraries(external_cld3 +INTERFACE + external_cld3_bundled + ${protobuf_lib} +) diff --git a/external/cld3/generate_protobuf.cmake b/external/cld3/generate_protobuf.cmake new file mode 100644 index 0000000..4a8ae1e --- /dev/null +++ b/external/cld3/generate_protobuf.cmake @@ -0,0 +1,59 @@ +function(generate_single_protobuf target_name gen_dst protobuf_name executable) + file(MAKE_DIRECTORY ${gen_dst}) + + # Copied from myprotobuf.cmake. + if (PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified + set(FIL ${cld3_src}/${protobuf_name}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if (${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + else() + set(_protobuf_include_path -I ${cld3_src}) + endif() + + if (DEFINED PROTOBUF_IMPORT_DIRS) + foreach (DIR ${PROTOBUF_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if (${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + endif() + # + + get_filename_component(protobuf_name_we ${protobuf_name} NAME_WE) + + set(gen_timestamp ${gen_dst}/${protobuf_name}.timestamp) + set(gen_files + ${gen_dst}/${protobuf_name_we}.pb.cc + ${gen_dst}/${protobuf_name_we}.pb.h + ) + + # Fix warning MSB8065. + set_source_files_properties(${gen_timestamp} PROPERTIES SYMBOLIC 1) + + set(gen_src ${cld3_src}/${protobuf_name}) + add_custom_command( + OUTPUT + ${gen_timestamp} + BYPRODUCTS + ${gen_files} + COMMAND + ${executable} + --cpp_out + ${gen_dst} + ${_protobuf_include_path} + ${gen_src} + COMMENT "Generating protobuf ${protobuf_name} (${target_name})" + DEPENDS + ${executable} + ${gen_src} + VERBATIM + ) + generate_target(${target_name} ${protobuf_name} ${gen_timestamp} "${gen_files}" ${gen_dst}) +endfunction() diff --git a/variables.cmake b/variables.cmake index dfd652f..91b7027 100644 --- a/variables.cmake +++ b/variables.cmake @@ -31,6 +31,7 @@ option(DESKTOP_APP_DISABLE_CRASH_REPORTS "Disable crash report generation." ${no option(DESKTOP_APP_DISABLE_AUTOUPDATE "Disable autoupdate." ${disable_autoupdate}) option(DESKTOP_APP_USE_HUNSPELL_ONLY "Disable system spellchecker and use bundled Hunspell only. (For debugging purposes)" OFF) cmake_dependent_option(DESKTOP_APP_USE_ENCHANT "Use Enchant instead of bundled Hunspell." OFF LINUX OFF) +cmake_dependent_option(DESKTOP_APP_USE_CLD3 "Disable system text language recognition and use bundled cld3 only." OFF APPLE ON) cmake_dependent_option(DESKTOP_APP_NO_PDB "Disable PDB file generation." OFF WIN32 OFF) cmake_dependent_option(DESKTOP_APP_DISABLE_JEMALLOC "Disable jemalloc, use system malloc." OFF LINUX OFF) @@ -50,6 +51,11 @@ if ((WIN32 set(add_hunspell_library 1) endif() +set(add_cld3_library 0) +if (LINUX OR DESKTOP_APP_USE_CLD3) + set(add_cld3_library 1) +endif() + set(build_macstore 0) set(build_winstore 0) # 32 or 64 bit set(build_win64 0) # normal or uwp