Limit characters that allow diacritics after them.

This commit is contained in:
John Preston 2024-05-28 11:44:53 +04:00
parent a48b3d3750
commit e7c598affe
2 changed files with 14 additions and 1 deletions

View file

@ -1822,6 +1822,9 @@ bool IsDiacritic(QChar ch) { // diacritic and variation selectors
} }
bool IsReplacedBySpace(QChar ch) { bool IsReplacedBySpace(QChar ch) {
// Those symbols are replaced by space on the Telegram server,
// so we replace them as well, for sent / received consistency.
//
// \xe2\x80[\xa8 - \xac\xad] // 8232 - 8237 // \xe2\x80[\xa8 - \xac\xad] // 8232 - 8237
// QString from1 = QString::fromUtf8("\xe2\x80\xa8"), to1 = QString::fromUtf8("\xe2\x80\xad"); // QString from1 = QString::fromUtf8("\xe2\x80\xa8"), to1 = QString::fromUtf8("\xe2\x80\xad");
// \xcc[\xb3\xbf\x8a] // 819, 831, 778 // \xcc[\xb3\xbf\x8a] // 819, 831, 778

View file

@ -81,6 +81,16 @@ constexpr auto kMaxDiacAfterSymbol = 2;
&& (font->f.family() == qstr("Open Sans")); && (font->f.family() == qstr("Open Sans"));
} }
[[nodiscard]] bool IsDiacriticAllowedAfter(QChar ch) {
const auto code = ch.unicode();
const auto category = ch.category();
return (code > 32)
&& (category != QChar::Other_Control)
&& (category != QChar::Other_Format)
&& (category != QChar::Other_PrivateUse)
&& (category != QChar::Other_NotAssigned);
}
} // namespace } // namespace
Parser::StartedEntity::StartedEntity(TextBlockFlags flags) Parser::StartedEntity::StartedEntity(TextBlockFlags flags)
@ -564,7 +574,7 @@ void Parser::parseCurrentChar() {
createBlock(-_emojiLookback); createBlock(-_emojiLookback);
} }
_t->_text.push_back(_ch); _t->_text.push_back(_ch);
_allowDiacritic = true; _allowDiacritic = IsDiacriticAllowedAfter(_ch);
} }
if (!isDiacritic) { if (!isDiacritic) {
_diacritics = 0; _diacritics = 0;