// This file is part of Desktop App Toolkit, // a set of libraries for developing nice desktop applications. // // For license and copyright information please follow this link: // https://github.com/desktop-app/legal/blob/master/LEGAL // #include "ui/text/text_parser.h" #include "base/platform/base_platform_info.h" #include "ui/integration.h" #include "ui/text/text_extended_data.h" #include "ui/text/text_isolated_emoji.h" #include "styles/style_basic.h" #include #include namespace Ui::Text { namespace { constexpr auto kStringLinkIndexShift = uint16(0x8000); constexpr auto kMaxDiacAfterSymbol = 2; [[nodiscard]] TextWithEntities PrepareRichFromRich( const TextWithEntities &text, const TextParseOptions &options) { auto result = text; const auto &preparsed = text.entities; const bool parseLinks = (options.flags & TextParseLinks); const bool parseColorized = (options.flags & TextParseColorized); if (!preparsed.isEmpty() && (parseLinks || parseColorized)) { bool parseMentions = (options.flags & TextParseMentions); bool parseHashtags = (options.flags & TextParseHashtags); bool parseBotCommands = (options.flags & TextParseBotCommands); bool parseMarkdown = (options.flags & TextParseMarkdown); if (!parseMentions || !parseHashtags || !parseBotCommands || !parseMarkdown) { int32 i = 0, l = preparsed.size(); result.entities.clear(); result.entities.reserve(l); for (; i < l; ++i) { auto type = preparsed.at(i).type(); if (((type == EntityType::Mention || type == EntityType::MentionName) && !parseMentions) || (type == EntityType::Hashtag && !parseHashtags) || (type == EntityType::Cashtag && !parseHashtags) || (!parseLinks && (type == EntityType::Url || type == EntityType::CustomUrl)) || (type == EntityType::BotCommand && !parseBotCommands) || // #TODO entities (!parseMarkdown && (type == EntityType::Bold || type == EntityType::Semibold || type == EntityType::Italic || type == EntityType::Underline || type == EntityType::StrikeOut || type == EntityType::Colorized || type == EntityType::Spoiler || type == EntityType::Code || type == EntityType::Pre || type == EntityType::Blockquote))) { continue; } result.entities.push_back(preparsed.at(i)); } } } return result; } [[nodiscard]] QFixed ComputeStopAfter( const TextParseOptions &options, const style::TextStyle &st) { return (options.maxw > 0 && options.maxh > 0) ? ((options.maxh / st.font->height) + 1) * options.maxw : QFIXED_MAX; } // Tilde fix in OpenSans. [[nodiscard]] bool ComputeCheckTilde(const style::TextStyle &st) { const auto &font = st.font; return (font->size() * style::DevicePixelRatio() == 13) && (font->flags() == 0) && (font->f.family() == qstr("Open Sans")); } [[nodiscard]] bool IsDiacriticAllowedAfter(QChar ch) { const auto code = ch.unicode(); const auto category = ch.category(); return (code > 32) && (category != QChar::Other_Control) && (category != QChar::Other_Format) && (category != QChar::Other_PrivateUse) && (category != QChar::Other_NotAssigned); } } // namespace Parser::StartedEntity::StartedEntity(TextBlockFlags flags) : _value(flags.value()) , _type(Type::Flags) { Expects(_value >= 0 && _value < int(kStringLinkIndexShift)); } Parser::StartedEntity::StartedEntity(uint16 index, Type type) : _value(index) , _type(type) { Expects((_type == Type::Link) ? (_value >= kStringLinkIndexShift) : (_value < kStringLinkIndexShift)); } Parser::StartedEntity::Type Parser::StartedEntity::type() const { return _type; } std::optional Parser::StartedEntity::flags() const { if (_value < int(kStringLinkIndexShift) && (_type == Type::Flags)) { return TextBlockFlags::from_raw(uint16(_value)); } return std::nullopt; } std::optional Parser::StartedEntity::linkIndex() const { if ((_value < int(kStringLinkIndexShift) && (_type == Type::IndexedLink)) || (_value >= int(kStringLinkIndexShift) && (_type == Type::Link))) { return uint16(_value); } return std::nullopt; } std::optional Parser::StartedEntity::colorIndex() const { if (_type == Type::Colorized) { return uint16(_value); } return std::nullopt; } Parser::Parser( not_null string, const TextWithEntities &textWithEntities, const TextParseOptions &options, const std::any &context) : Parser( string, PrepareRichFromRich(textWithEntities, options), options, context, ReadyToken()) { } Parser::Parser( not_null string, TextWithEntities &&source, const TextParseOptions &options, const std::any &context, ReadyToken) : _t(string) , _source(std::move(source)) , _context(context) , _start(_source.text.constData()) , _end(_start + _source.text.size()) , _ptr(_start) , _entitiesEnd(_source.entities.end()) , _waitingEntity(_source.entities.begin()) , _multiline(options.flags & TextParseMultiline) , _stopAfterWidth(ComputeStopAfter(options, *_t->_st)) , _checkTilde(ComputeCheckTilde(*_t->_st)) { parse(options); } void Parser::blockCreated() { _sumWidth += _t->_blocks.back()->f_width(); if (_sumWidth.floor().toInt() > _stopAfterWidth) { _sumFinished = true; } } void Parser::createBlock(int32 skipBack) { if (_linkIndex < kStringLinkIndexShift && _linkIndex > _maxLinkIndex) { _maxLinkIndex = _linkIndex; } if (_linkIndex > kStringLinkIndexShift) { _maxShiftedLinkIndex = std::max( uint16(_linkIndex - kStringLinkIndexShift), _maxShiftedLinkIndex); } const auto length = int32(_t->_text.size()) + skipBack - _blockStart; if (length <= 0) { return; } const auto newline = !_emoji && (length == 1) && (_t->_text.at(_blockStart) == QChar::LineFeed); if (_newlineAwaited) { _newlineAwaited = false; if (!newline) { _t->insertModifications(_blockStart, 1); _t->_text.insert(_blockStart, QChar::LineFeed); createBlock(skipBack - length); } } const auto linkIndex = _monoIndex ? _monoIndex : _linkIndex; auto custom = _customEmojiData.isEmpty() ? nullptr : Integration::Instance().createCustomEmoji( _customEmojiData, _context); const auto push = [&](auto &&factory, auto &&...args) { _t->_blocks.push_back(factory( _t->_st->font, _t->_text, _blockStart, length, _flags, linkIndex, _colorIndex, std::forward(args)...)); }; if (custom) { push(&Block::CustomEmoji, std::move(custom)); } else if (_emoji) { push(&Block::Emoji, _emoji); } else if (newline) { push(&Block::Newline); auto &newline = _t->_blocks.back().unsafe(); newline._quoteIndex = _quoteIndex; } else { push(&Block::Text, _t->_minResizeWidth); } // Diacritic can't attach from the next block to this one. _allowDiacritic = false; _blockStart += length; _customEmojiData = QByteArray(); _emoji = nullptr; blockCreated(); } void Parser::createNewlineBlock(bool fromOriginalText) { if (!fromOriginalText) { _t->insertModifications(_t->_text.size(), 1); } _t->_text.push_back(QChar::LineFeed); _allowDiacritic = false; createBlock(); } void Parser::ensureAtNewline(QuoteDetails quote) { createBlock(); const auto lastType = _t->_blocks.empty() ? TextBlockType::Newline : _t->_blocks.back()->type(); if (lastType != TextBlockType::Newline) { auto saved = base::take(_customEmojiData); createNewlineBlock(false); _customEmojiData = base::take(saved); } _quoteStartPosition = _t->_text.size(); auto "es = _t->ensureExtended()->quotes; quotes.push_back(std::move(quote)); const auto index = _quoteIndex = int(quotes.size()); if (_t->_blocks.empty()) { _t->_startQuoteIndex = index; } else { auto &last = _t->_blocks.back(); Assert(last->type() == TextBlockType::Newline); last.unsafe()._quoteIndex = index; } } void Parser::finishEntities() { while (!_startedEntities.empty() && (_ptr >= _startedEntities.begin()->first || _ptr >= _end)) { auto list = std::move(_startedEntities.begin()->second); _startedEntities.erase(_startedEntities.begin()); while (!list.empty()) { if (list.back().type() == StartedEntity::Type::CustomEmoji) { createBlock(); } else if (const auto flags = list.back().flags()) { if (_flags & (*flags)) { createBlock(); _flags &= ~(*flags); const auto lastType = _t->_blocks.empty() ? TextBlockType::Newline : _t->_blocks.back()->type(); if ((*flags) & (TextBlockFlag::Pre | TextBlockFlag::Blockquote)) { if (_quoteIndex) { auto "es = _t->ensureExtended()->quotes; auto "e = quotes[_quoteIndex - 1]; const auto from = _quoteStartPosition; const auto till = _t->_text.size(); if (quote.pre && till > from) { quote.copy = std::make_shared( _t, from, till - from); } } _quoteIndex = 0; if (lastType != TextBlockType::Newline) { _newlineAwaited = true; } else if (_t->_blocks.empty()) { _t->_startQuoteIndex = 0; } else { auto &last = _t->_blocks.back(); last.unsafe()._quoteIndex = 0; } } if (IsMono(*flags)) { _monoIndex = 0; } } } else if (const auto linkIndex = list.back().linkIndex()) { if (_linkIndex == *linkIndex) { createBlock(); _linkIndex = 0; } } else if (const auto colorIndex = list.back().colorIndex()) { if (_colorIndex == *colorIndex) { createBlock(); _colorIndex = 0; } } list.pop_back(); } } } // Returns true if at least one entity was parsed in the current position. bool Parser::checkEntities() { finishEntities(); skipPassedEntities(); if (_waitingEntity == _entitiesEnd || _ptr < _start + _waitingEntity->offset()) { return false; } auto flags = TextBlockFlags(); auto link = EntityLinkData(); auto monoIndex = 0; const auto entityType = _waitingEntity->type(); const auto entityLength = _waitingEntity->length(); const auto entityBegin = _start + _waitingEntity->offset(); const auto entityEnd = entityBegin + entityLength; const auto pushSimpleUrl = [&](EntityType type) { link.type = type; link.data = QString(entityBegin, entityLength); if (type == EntityType::Url) { computeLinkText(link.data, &link.text, &link.shown); } else { link.text = link.data; } }; const auto pushComplexUrl = [&] { link.type = entityType; link.data = _waitingEntity->data(); link.text = QString(entityBegin, entityLength); }; using Type = StartedEntity::Type; if (entityType == EntityType::CustomEmoji) { createBlock(); _customEmojiData = _waitingEntity->data(); _startedEntities[entityEnd].emplace_back(0, Type::CustomEmoji); } else if (entityType == EntityType::Bold) { flags = TextBlockFlag::Bold; } else if (entityType == EntityType::Semibold) { flags = TextBlockFlag::Semibold; } else if (entityType == EntityType::Italic) { flags = TextBlockFlag::Italic; } else if (entityType == EntityType::Underline) { flags = TextBlockFlag::Underline; } else if (entityType == EntityType::Spoiler) { flags = TextBlockFlag::Spoiler; } else if (entityType == EntityType::StrikeOut) { flags = TextBlockFlag::StrikeOut; } else if ((entityType == EntityType::Code) // #TODO entities || (entityType == EntityType::Pre)) { if (entityType == EntityType::Code) { flags = TextBlockFlag::Code; } else { flags = TextBlockFlag::Pre; ensureAtNewline({ .language = _waitingEntity->data(), .pre = true, }); } const auto text = QString(entityBegin, entityLength); // It is better to trim the text to identify "Sample\n" as inline. const auto trimmed = text.trimmed(); const auto isSingleLine = !trimmed.isEmpty() && ranges::none_of(trimmed, IsNewline); // TODO: remove trimming. if (isSingleLine && (entityType == EntityType::Code)) { _monos.push_back({ .text = text, .type = entityType }); monoIndex = _monos.size(); } } else if (entityType == EntityType::Blockquote) { flags = TextBlockFlag::Blockquote; ensureAtNewline({ .blockquote = true }); } else if (entityType == EntityType::Url || entityType == EntityType::Email || entityType == EntityType::Mention || entityType == EntityType::Hashtag || entityType == EntityType::Cashtag || entityType == EntityType::BotCommand) { pushSimpleUrl(entityType); } else if (entityType == EntityType::CustomUrl) { const auto url = _waitingEntity->data(); const auto text = QString(entityBegin, entityLength); if (url == text) { pushSimpleUrl(EntityType::Url); } else { pushComplexUrl(); } } else if (entityType == EntityType::MentionName) { pushComplexUrl(); } else if (entityType == EntityType::Colorized) { createBlock(); const auto data = _waitingEntity->data(); _colorIndex = data.isEmpty() ? 1 : (data.front().unicode() + 1); _startedEntities[entityEnd].emplace_back( _colorIndex, Type::Colorized); } if (link.type != EntityType::Invalid) { createBlock(); _links.push_back(link); const auto tempIndex = _links.size(); const auto useCustom = processCustomIndex(tempIndex); _linkIndex = tempIndex + (useCustom ? 0 : kStringLinkIndexShift); _startedEntities[entityEnd].emplace_back( _linkIndex, useCustom ? Type::IndexedLink : Type::Link); } else if (flags) { if (!(_flags & flags)) { createBlock(); _flags |= flags; _startedEntities[entityEnd].emplace_back(flags); _monoIndex = monoIndex; } } ++_waitingEntity; skipBadEntities(); return true; } bool Parser::processCustomIndex(uint16 index) { auto &url = _links[index - 1].data; if (url.isEmpty()) { return false; } if (url.startsWith("internal:index")) { const auto customIndex = uint16(url.back().unicode()); // if (customIndex != index) { url = QString(); _linksIndexes.push_back(customIndex); return true; // } } return false; } void Parser::skipPassedEntities() { while (_waitingEntity != _entitiesEnd && _start + _waitingEntity->offset() + _waitingEntity->length() <= _ptr) { ++_waitingEntity; } } void Parser::skipBadEntities() { if (_links.size() >= 0x7FFF) { while (_waitingEntity != _entitiesEnd && (isLinkEntity(*_waitingEntity) || isInvalidEntity(*_waitingEntity))) { ++_waitingEntity; } } else { while (_waitingEntity != _entitiesEnd && isInvalidEntity(*_waitingEntity)) { ++_waitingEntity; } } } void Parser::parseCurrentChar() { _ch = ((_ptr < _end) ? *_ptr : 0); _emojiLookback = 0; const auto inCustomEmoji = !_customEmojiData.isEmpty(); const auto isNewLine = !inCustomEmoji && _multiline && IsNewline(_ch); const auto replaceWithSpace = IsSpace(_ch) && (_ch != QChar::Nbsp); const auto isDiacritic = IsDiacritic(_ch); const auto isTilde = !inCustomEmoji && _checkTilde && (_ch == '~'); const auto skip = [&] { if (IsBad(_ch) || _ch.isLowSurrogate()) { return true; } else if (_ch == 0xFE0F && Platform::IsMac()) { // Some sequences like 0x0E53 0xFE0F crash OS X harfbuzz text processing :( return true; } else if (isDiacritic) { if (!_allowDiacritic || _emoji || ++_diacritics > kMaxDiacAfterSymbol) { return true; } } else if (_ch.isHighSurrogate()) { if (_ptr + 1 >= _end || !(_ptr + 1)->isLowSurrogate()) { return true; } const auto ucs4 = QChar::surrogateToUcs4(_ch, *(_ptr + 1)); if (ucs4 >= 0xE0000) { // Unicode tags are skipped. // Only place they work is in some flag emoji, // but in that case they were already parsed as emoji before. // // For unknown reason in some unknown cases strings with such // symbols lead to crashes on some Linux distributions, see // https://github.com/telegramdesktop/tdesktop/issues/7005 // // At least one crashing text was starting that way: // // 0xd83d 0xdcda 0xdb40 0xdc69 0xdb40 0xdc64 0xdb40 0xdc6a // 0xdb40 0xdc77 0xdb40 0xdc7f 0x32 ... simple text here ... // // or in codepoints: // // 0x1f4da 0xe0069 0xe0064 0xe006a 0xe0077 0xe007f 0x32 ... return true; } } return false; }(); if (_ch.isHighSurrogate() && !skip) { _t->_text.push_back(_ch); ++_ptr; _ch = *_ptr; _emojiLookback = 1; } if (skip) { if (_ptr < _end) { _t->insertModifications(_t->_text.size(), -1); } _ch = 0; _allowDiacritic = false; } else { if (isTilde) { // Tilde fix in OpenSans. if (!(_flags & TextBlockFlag::Tilde)) { createBlock(-_emojiLookback); _flags |= TextBlockFlag::Tilde; } } else { if (_flags & TextBlockFlag::Tilde) { createBlock(-_emojiLookback); _flags &= ~TextBlockFlag::Tilde; } } if (isNewLine) { createBlock(); createNewlineBlock(true); } else if (replaceWithSpace) { _t->_text.push_back(QChar::Space); _allowDiacritic = false; } else { if (_emoji) { createBlock(-_emojiLookback); } _t->_text.push_back(_ch); _allowDiacritic = IsDiacriticAllowedAfter(_ch); } if (!isDiacritic) { _diacritics = 0; } } } void Parser::parseEmojiFromCurrent() { if (!_customEmojiData.isEmpty()) { return; } int len = 0; auto e = Emoji::Find(_ptr - _emojiLookback, _end, &len); if (!e) return; for (int l = len - _emojiLookback - 1; l > 0; --l) { _t->_text.push_back(*++_ptr); } if (e->hasPostfix()) { Assert(!_t->_text.isEmpty()); const auto last = _t->_text[_t->_text.size() - 1]; if (last.unicode() != Emoji::kPostfix) { _t->insertModifications(_t->_text.size(), 1); _t->_text.push_back(QChar(Emoji::kPostfix)); ++len; } } createBlock(-len); _emoji = e; } bool Parser::isInvalidEntity(const EntityInText &entity) const { const auto length = entity.length(); return (_start + entity.offset() + length > _end) || (length <= 0); } bool Parser::isLinkEntity(const EntityInText &entity) const { const auto type = entity.type(); const auto urls = { EntityType::Url, EntityType::CustomUrl, EntityType::Email, EntityType::Hashtag, EntityType::Cashtag, EntityType::Mention, EntityType::MentionName, EntityType::BotCommand }; return ranges::find(urls, type) != std::end(urls); } void Parser::parse(const TextParseOptions &options) { skipBadEntities(); trimSourceRange(); _t->_text.resize(0); if (_t->_extended) { base::take(_t->_extended->modifications); } _t->_text.reserve(_end - _ptr); if (_ptr > _start) { _t->insertModifications(0, -(_ptr - _start)); } for (; _ptr <= _end; ++_ptr) { while (checkEntities()) { } parseCurrentChar(); parseEmojiFromCurrent(); if (_sumFinished || _t->_text.size() >= 0x8000) { break; // 32k max } } createBlock(); finalize(options); } void Parser::trimSourceRange() { const auto firstMonospaceOffset = EntityInText::FirstMonospaceOffset( _source.entities, _end - _start); while (_ptr != _end && IsTrimmed(*_ptr) && _ptr != _start + firstMonospaceOffset) { ++_ptr; } while (_ptr != _end && IsTrimmed(*(_end - 1))) { --_end; } } // void Parser::checkForElidedSkipBlock() { // if (!_sumFinished || !_rich) { // return; // } // // We could've skipped the final skip block command. // for (; _ptr < _end; ++_ptr) { // if (*_ptr == TextCommand && readSkipBlockCommand()) { // break; // } // } // } void Parser::finalize(const TextParseOptions &options) { auto links = (_maxLinkIndex || _maxShiftedLinkIndex) ? &_t->ensureExtended()->links : nullptr; if (links) { links->resize(_maxLinkIndex + _maxShiftedLinkIndex); } auto counterCustomIndex = uint16(0); auto currentIndex = uint16(0); // Current the latest index of _t->_links. struct { uint16 mono = 0; uint16 lnk = 0; } lastHandlerIndex; const auto avoidIntersectionsWithCustom = [&] { while (ranges::contains(_linksIndexes, currentIndex)) { currentIndex++; } }; auto isolatedEmojiCount = 0; _t->_hasCustomEmoji = false; _t->_isIsolatedEmoji = true; _t->_isOnlyCustomEmoji = true; _t->_hasNotEmojiAndSpaces = false; auto spacesCheckFrom = uint16(-1); const auto length = int(_t->_text.size()); for (auto &block : _t->_blocks) { if (block->type() == TextBlockType::CustomEmoji) { _t->_hasCustomEmoji = true; } else if (block->type() != TextBlockType::Newline && block->type() != TextBlockType::Skip) { _t->_isOnlyCustomEmoji = false; } else if (block->linkIndex()) { _t->_isOnlyCustomEmoji = _t->_isIsolatedEmoji = false; } if (!_t->_hasNotEmojiAndSpaces) { if (block->type() == TextBlockType::Text) { if (spacesCheckFrom == uint16(-1)) { spacesCheckFrom = block->position(); } } else if (spacesCheckFrom != uint16(-1)) { const auto checkTill = block->position(); for (auto i = spacesCheckFrom; i != checkTill; ++i) { Assert(i < length); if (!_t->_text[i].isSpace()) { _t->_hasNotEmojiAndSpaces = true; break; } } spacesCheckFrom = uint16(-1); } } if (_t->_isIsolatedEmoji) { if (block->type() == TextBlockType::CustomEmoji || block->type() == TextBlockType::Emoji) { if (++isolatedEmojiCount > kIsolatedEmojiLimit) { _t->_isIsolatedEmoji = false; } } else if (block->type() != TextBlockType::Skip) { _t->_isIsolatedEmoji = false; } } if (block->flags() & TextBlockFlag::Spoiler) { auto &spoiler = _t->ensureExtended()->spoiler; if (!spoiler) { spoiler = std::make_unique( Integration::Instance().createSpoilerRepaint(_context)); } } const auto shiftedIndex = block->linkIndex(); auto useCustomIndex = false; if (shiftedIndex <= kStringLinkIndexShift) { if (IsMono(block->flags()) && shiftedIndex) { const auto monoIndex = shiftedIndex; if (lastHandlerIndex.mono == monoIndex) { block->setLinkIndex(currentIndex); continue; // Optimization. } else { currentIndex++; } avoidIntersectionsWithCustom(); block->setLinkIndex(currentIndex); const auto handler = Integration::Instance().createLinkHandler( _monos[monoIndex - 1], _context); if (!links) { links = &_t->ensureExtended()->links; } links->resize(currentIndex); if (handler) { _t->setLink(currentIndex, handler); } lastHandlerIndex.mono = monoIndex; continue; } else if (shiftedIndex) { useCustomIndex = true; } else { continue; } } const auto usedIndex = [&] { return useCustomIndex ? _linksIndexes[counterCustomIndex - 1] : currentIndex; }; const auto realIndex = useCustomIndex ? shiftedIndex : (shiftedIndex - kStringLinkIndexShift); if (lastHandlerIndex.lnk == realIndex) { block->setLinkIndex(usedIndex()); continue; // Optimization. } else { (useCustomIndex ? counterCustomIndex : currentIndex)++; } if (!useCustomIndex) { avoidIntersectionsWithCustom(); } block->setLinkIndex(usedIndex()); if (links) { links->resize(std::max(usedIndex(), uint16(links->size()))); } const auto handler = Integration::Instance().createLinkHandler( _links[realIndex - 1], _context); if (handler) { _t->setLink(usedIndex(), handler); } lastHandlerIndex.lnk = realIndex; } const auto hasSpoiler = (_t->_extended && _t->_extended->spoiler); if (!_t->_hasCustomEmoji || hasSpoiler) { _t->_isOnlyCustomEmoji = false; } if (_t->_blocks.empty() || hasSpoiler) { _t->_isIsolatedEmoji = false; } if (!_t->_hasNotEmojiAndSpaces && spacesCheckFrom != uint16(-1)) { Assert(spacesCheckFrom < length); for (auto i = spacesCheckFrom; i != length; ++i) { Assert(i < length); if (!_t->_text[i].isSpace()) { _t->_hasNotEmojiAndSpaces = true; break; } } } _t->_text.squeeze(); _t->_blocks.shrink_to_fit(); if (const auto extended = _t->_extended.get()) { extended->links.shrink_to_fit(); extended->modifications.shrink_to_fit(); } } void Parser::computeLinkText( const QString &linkData, QString *outLinkText, EntityLinkShown *outShown) { auto url = QUrl(linkData); auto good = QUrl(url.isValid() ? url.toEncoded() : QByteArray()); auto readable = good.isValid() ? good.toDisplayString() : linkData; *outLinkText = _t->_st->font->elided(readable, st::linkCropLimit); *outShown = (*outLinkText == readable) ? EntityLinkShown::Full : EntityLinkShown::Partial; } } // namespace Ui::Text