312 lines
		
	
	
	
		
			8.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			312 lines
		
	
	
	
		
			8.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
| This file is part of Telegram Desktop,
 | |
| the official desktop version of Telegram messaging app, see https://telegram.org
 | |
| 
 | |
| Telegram Desktop is free software: you can redistribute it and/or modify
 | |
| it under the terms of the GNU General Public License as published by
 | |
| the Free Software Foundation, either version 3 of the License, or
 | |
| (at your option) any later version.
 | |
| 
 | |
| It is distributed in the hope that it will be useful,
 | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 | |
| GNU General Public License for more details.
 | |
| 
 | |
| In addition, as a special exception, the copyright holders give permission
 | |
| to link the code of portions of this program with the OpenSSL library.
 | |
| 
 | |
| Full license: https://github.com/telegramdesktop/tdesktop/blob/master/LICENSE
 | |
| Copyright (c) 2014-2017 John Preston, https://desktop.telegram.org
 | |
| */
 | |
| #include "codegen/common/basic_tokenized_file.h"
 | |
| 
 | |
| #include "codegen/common/logging.h"
 | |
| #include "codegen/common/clean_file_reader.h"
 | |
| #include "codegen/common/checked_utf8_string.h"
 | |
| 
 | |
| using Token = codegen::common::BasicTokenizedFile::Token;
 | |
| using Type = Token::Type;
 | |
| 
 | |
| namespace codegen {
 | |
| namespace common {
 | |
| namespace {
 | |
| 
 | |
| constexpr int kErrorUnterminatedStringLiteral = 201;
 | |
| constexpr int kErrorIncorrectUtf8String       = 202;
 | |
| constexpr int kErrorIncorrectToken            = 203;
 | |
| constexpr int kErrorUnexpectedToken           = 204;
 | |
| 
 | |
| bool isDigitChar(char ch) {
 | |
| 	return (ch >= '0') && (ch <= '9');
 | |
| }
 | |
| 
 | |
| bool isNameChar(char ch) {
 | |
| 	return isDigitChar(ch) || ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || (ch == '_');
 | |
| }
 | |
| 
 | |
| bool isWhitespaceChar(char ch) {
 | |
| 	return (ch == '\n' || ch == '\r' || ch == ' ' || ch == '\t');
 | |
| }
 | |
| 
 | |
| Token invalidToken() {
 | |
| 	return { Type::Invalid, QString(), ConstUtf8String(nullptr, 0), false };
 | |
| }
 | |
| 
 | |
| } // namespace
 | |
| 
 | |
| BasicTokenizedFile::BasicTokenizedFile(const QString &filepath) : reader_(filepath) {
 | |
| }
 | |
| 
 | |
| BasicTokenizedFile::BasicTokenizedFile(const QByteArray &content, const QString &filepath) : reader_(content, filepath) {
 | |
| }
 | |
| 
 | |
| bool BasicTokenizedFile::putBack() {
 | |
| 	if (currentToken_ > 0) {
 | |
| 		--currentToken_;
 | |
| 		return true;
 | |
| 	}
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| Token BasicTokenizedFile::getAnyToken() {
 | |
| 	if (currentToken_ >= tokens_.size()) {
 | |
| 		if (readToken() == Type::Invalid) {
 | |
| 			return invalidToken();
 | |
| 		}
 | |
| 	}
 | |
| 	return tokens_.at(currentToken_++);
 | |
| }
 | |
| 
 | |
| Token BasicTokenizedFile::getToken(Type typeCondition) {
 | |
| 	if (auto token = getAnyToken()) {
 | |
| 		if (token.type == typeCondition) {
 | |
| 			return token;
 | |
| 		}
 | |
| 		putBack();
 | |
| 	}
 | |
| 	return invalidToken();
 | |
| }
 | |
| 
 | |
| Type BasicTokenizedFile::readToken() {
 | |
| 	auto result = readOneToken(StartWithWhitespace::Allow);
 | |
| 
 | |
| 	// Try to read double token.
 | |
| 	if (result == Type::Int) {
 | |
| 		if (readOneToken(StartWithWhitespace::Deny) == Type::Dot) {
 | |
| 			// We got int and dot, so it is double already.
 | |
| 			result = uniteLastTokens(Type::Double);
 | |
| 
 | |
| 			// Try to read one more int (after dot).
 | |
| 			if (readOneToken(StartWithWhitespace::Deny) == Type::Int) {
 | |
| 				result = uniteLastTokens(Type::Double);
 | |
| 			}
 | |
| 		}
 | |
| 	} else if (result == Type::Dot) {
 | |
| 		if (readOneToken(StartWithWhitespace::Deny) == Type::Int) {
 | |
| 			//We got dot and int, so it is double.
 | |
| 			result = uniteLastTokens(Type::Double);
 | |
| 		}
 | |
| 	}
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| Type BasicTokenizedFile::readOneToken(StartWithWhitespace condition) {
 | |
| 	skipWhitespaces();
 | |
| 	if (tokenStartWhitespace_ && condition == StartWithWhitespace::Deny) {
 | |
| 		return Type::Invalid;
 | |
| 	}
 | |
| 	if (reader_.atEnd()) {
 | |
| 		return Type::Invalid;
 | |
| 	}
 | |
| 
 | |
| 	auto ch = reader_.currentChar();
 | |
| 	if (ch == '"') {
 | |
| 		return readString();
 | |
| 	} else if (isNameChar(ch)) {
 | |
| 		return readNameOrNumber();
 | |
| 	}
 | |
| 	return readSingleLetter();
 | |
| }
 | |
| 
 | |
| Type BasicTokenizedFile::saveToken(Type type, const QString &value) {
 | |
| 	ConstUtf8String original = { tokenStart_, reader_.currentPtr() };
 | |
| 	tokens_.push_back({ type, value, original, tokenStartWhitespace_ });
 | |
| 	return type;
 | |
| }
 | |
| 
 | |
| Type BasicTokenizedFile::uniteLastTokens(Type type) {
 | |
| 	auto size = tokens_.size();
 | |
| 	if (size < 2) {
 | |
| 		return Type::Invalid;
 | |
| 	}
 | |
| 
 | |
| 	auto &token(tokens_[size - 2]);
 | |
| 	auto originalFrom = token.original.data();
 | |
| 	auto originalTill = tokens_.back().original.end();
 | |
| 	token.type = type;
 | |
| 	token.original = { originalFrom, originalTill };
 | |
| 	token.value += tokens_.back().value;
 | |
| 	tokens_.pop_back();
 | |
| 	return type;
 | |
| }
 | |
| 
 | |
| QString BasicTokenizedFile::getCurrentLineComment() {
 | |
| 	if (lineNumber_ > singleLineComments_.size()) {
 | |
| 		reader_.logError(kErrorInternal, lineNumber_) << "internal tokenizer error (line number larger than comments list size).";
 | |
| 		failed_ = true;
 | |
| 		return QString();
 | |
| 	}
 | |
| 	auto commentBytes = singleLineComments_[lineNumber_ - 1].mid(2); // Skip "//"
 | |
| 	CheckedUtf8String comment(commentBytes);
 | |
| 	if (!comment.isValid()) {
 | |
| 		reader_.logError(kErrorIncorrectUtf8String, lineNumber_) << "incorrect UTF-8 string in the comment.";
 | |
| 		failed_ = true;
 | |
| 		return QString();
 | |
| 	}
 | |
| 	return comment.toString().trimmed();
 | |
| }
 | |
| 
 | |
| Type BasicTokenizedFile::readNameOrNumber() {
 | |
| 	while (!reader_.atEnd()) {
 | |
| 		if (!isDigitChar(reader_.currentChar())) {
 | |
| 			break;
 | |
| 		}
 | |
| 		reader_.skipChar();
 | |
| 	}
 | |
| 	bool onlyDigits = true;
 | |
| 	while (!reader_.atEnd()) {
 | |
| 		if (!isNameChar(reader_.currentChar())) {
 | |
| 			break;
 | |
| 		}
 | |
| 		onlyDigits = false;
 | |
| 		reader_.skipChar();
 | |
| 	}
 | |
| 	return saveToken(onlyDigits ? Type::Int : Type::Name);
 | |
| }
 | |
| 
 | |
| Type BasicTokenizedFile::readString() {
 | |
| 	reader_.skipChar();
 | |
| 	auto offset = reader_.currentPtr();
 | |
| 
 | |
| 	QByteArray value;
 | |
| 	while (!reader_.atEnd()) {
 | |
| 		auto ch = reader_.currentChar();
 | |
| 		if (ch == '"') {
 | |
| 			break;
 | |
| 		}
 | |
| 		if (ch == '\n') {
 | |
| 			reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal.";
 | |
| 			failed_ = true;
 | |
| 			return Type::Invalid;
 | |
| 		}
 | |
| 		if (ch == '\\') {
 | |
| 			reader_.skipChar();
 | |
| 			ch = reader_.currentChar();
 | |
| 			if (reader_.atEnd() || ch == '\n') {
 | |
| 				reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal.";
 | |
| 				failed_ = true;
 | |
| 				return Type::Invalid;
 | |
| 			}
 | |
| 			if (reader_.currentPtr() > offset + 1) {
 | |
| 				value.append(offset, reader_.currentPtr() - offset - 1);
 | |
| 			}
 | |
| 			offset = reader_.currentPtr() + 1;
 | |
| 			if (ch == 'n') {
 | |
| 				value.append('\n');
 | |
| 			} else if (ch == 't') {
 | |
| 				value.append('\t');
 | |
| 			} else if (ch == '"') {
 | |
| 				value.append('"');
 | |
| 			} else if (ch == '\\') {
 | |
| 				value.append('\\');
 | |
| 			}
 | |
| 		} else {
 | |
| 			value.append(ch);
 | |
| 		}
 | |
| 		reader_.skipChar();
 | |
| 	}
 | |
| 	if (reader_.atEnd()) {
 | |
| 		reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal.";
 | |
| 		failed_ = true;
 | |
| 		return Type::Invalid;
 | |
| 	}
 | |
| 	CheckedUtf8String checked(value);
 | |
| 	if (!checked.isValid()) {
 | |
| 		reader_.logError(kErrorIncorrectUtf8String, lineNumber_) << "incorrect UTF-8 string literal.";
 | |
| 		failed_ = true;
 | |
| 		return Type::Invalid;
 | |
| 	}
 | |
| 	reader_.skipChar();
 | |
| 	return saveToken(Type::String, checked.toString());
 | |
| }
 | |
| 
 | |
| Type BasicTokenizedFile::readSingleLetter() {
 | |
| 	auto type = singleLetterTokens_.value(reader_.currentChar(), Type::Invalid);
 | |
| 	if (type == Type::Invalid) {
 | |
| 		reader_.logError(kErrorIncorrectToken, lineNumber_) << "incorrect token '" << reader_.currentChar() << "'";
 | |
| 		return Type::Invalid;
 | |
| 	}
 | |
| 
 | |
| 	reader_.skipChar();
 | |
| 	return saveToken(type);
 | |
| }
 | |
| 
 | |
| void BasicTokenizedFile::skipWhitespaces() {
 | |
| 	if (reader_.atEnd()) return;
 | |
| 
 | |
| 	auto ch = reader_.currentChar();
 | |
| 	tokenStartWhitespace_ = isWhitespaceChar(ch);
 | |
| 	if (tokenStartWhitespace_) {
 | |
| 		do {
 | |
| 			if (ch == '\n') {
 | |
| 				++lineNumber_;
 | |
| 			}
 | |
| 			reader_.skipChar();
 | |
| 			ch = reader_.currentChar();
 | |
| 		} while (!reader_.atEnd() && isWhitespaceChar(ch));
 | |
| 	}
 | |
| 	tokenStart_ = reader_.currentPtr();
 | |
| }
 | |
| 
 | |
| LogStream operator<<(LogStream &&stream, BasicTokenizedFile::Token::Type type) {
 | |
| 	const char *value = "'invalid'";
 | |
| 	switch (type) {
 | |
| 	case Type::Invalid: break;
 | |
| 	case Type::Int: value = "'int'"; break;
 | |
| 	case Type::Double: value = "'double'"; break;
 | |
| 	case Type::String: value = "'string'"; break;
 | |
| 	case Type::LeftParenthesis: value = "'('"; break;
 | |
| 	case Type::RightParenthesis: value = "')'"; break;
 | |
| 	case Type::LeftBrace: value = "'{'"; break;
 | |
| 	case Type::RightBrace: value = "'}'"; break;
 | |
| 	case Type::LeftBracket: value = "'['"; break;
 | |
| 	case Type::RightBracket: value = "']'"; break;
 | |
| 	case Type::Colon: value = "':'"; break;
 | |
| 	case Type::Semicolon: value = "';'"; break;
 | |
| 	case Type::Comma: value = "','"; break;
 | |
| 	case Type::Dot: value = "'.'"; break;
 | |
| 	case Type::Number: value = "'#'"; break;
 | |
| 	case Type::Plus: value = "'+'"; break;
 | |
| 	case Type::Minus: value = "'-'"; break;
 | |
| 	case Type::Equals: value = "'='"; break;
 | |
| 	case Type::Name: value = "'identifier'"; break;
 | |
| 	}
 | |
| 	return std::forward<LogStream>(stream) << value;
 | |
| }
 | |
| 
 | |
| LogStream BasicTokenizedFile::logError(int code) const {
 | |
| 	return reader_.logError(code, lineNumber_);
 | |
| }
 | |
| 
 | |
| LogStream BasicTokenizedFile::logErrorUnexpectedToken() const {
 | |
| 	if (currentToken_ < tokens_.size()) {
 | |
| 		auto token = tokens_.at(currentToken_).original.toStdString();
 | |
| 		return logError(kErrorUnexpectedToken) << "unexpected token '" << token << "', expected ";
 | |
| 	}
 | |
| 	return logError(kErrorUnexpectedToken) << "unexpected token, expected ";
 | |
| }
 | |
| 
 | |
| BasicTokenizedFile::~BasicTokenizedFile() = default;
 | |
| 
 | |
| } // namespace common
 | |
| } // namespace codegen
 | 
