add finegrained whitespace control

This commit is contained in:
pantor
2020-06-29 23:12:12 +02:00
parent d43d497f88
commit c4d99c9455
7 changed files with 172 additions and 21 deletions

View File

@@ -260,7 +260,13 @@ env.set_trim_blocks(true);
env.set_lstrip_blocks(true);
```
With both `trim_blocks` and `lstrip_blocks` enabled, you can put statements on their own lines.
With both `trim_blocks` and `lstrip_blocks` enabled, you can put statements on their own lines. Furthermore, you can also strip whitespaces by hand. If you add a minus sign (`-`) to the start or end of a statement, the whitespaces before or after that block will be removed:
```.cpp
render("""{% if neighbour in guests -%} I was there{% endif -%} !""", data); // Renders without any whitespaces
```
Stripping behind a statement also remove any newlines.
### Callbacks

View File

@@ -17,7 +17,10 @@ enum class ElementNotation { Dot, Pointer };
*/
struct LexerConfig {
std::string statement_open {"{%"};
std::string statement_open_no_lstrip {"{%+"};
std::string statement_open_force_lstrip {"{%-"};
std::string statement_close {"%}"};
std::string statement_close_force_rstrip {"-%}"};
std::string line_statement {"##"};
std::string expression_open {"{{"};
std::string expression_close {"}}"};
@@ -36,6 +39,12 @@ struct LexerConfig {
if (open_chars.find(statement_open[0]) == std::string::npos) {
open_chars += statement_open[0];
}
if (open_chars.find(statement_open_no_lstrip[0]) == std::string::npos) {
open_chars += statement_open_no_lstrip[0];
}
if (open_chars.find(statement_open_force_lstrip[0]) == std::string::npos) {
open_chars += statement_open_force_lstrip[0];
}
if (open_chars.find(expression_open[0]) == std::string::npos) {
open_chars += expression_open[0];
}

View File

@@ -47,7 +47,10 @@ public:
/// Sets the opener and closer for template statements
void set_statement(const std::string &open, const std::string &close) {
lexer_config.statement_open = open;
lexer_config.statement_open_no_lstrip = open + "+";
lexer_config.statement_open_force_lstrip = open + "-";
lexer_config.statement_close = close;
lexer_config.statement_close_force_rstrip = "-" + close;
lexer_config.update_open_chars();
}

View File

@@ -23,6 +23,8 @@ class Lexer {
LineStart,
LineBody,
StatementStart,
StatementStartNoLstrip,
StatementStartForceLstrip,
StatementBody,
CommentStart,
CommentBody
@@ -36,7 +38,7 @@ class Lexer {
size_t pos;
Token scan_body(nonstd::string_view close, Token::Kind closeKind, bool trim = false) {
Token scan_body(nonstd::string_view close, Token::Kind closeKind, nonstd::string_view close_trim = nonstd::string_view(), bool trim = false) {
again:
// skip whitespace (except for \n as it might be a close)
if (tok_start >= m_in.size()) {
@@ -49,12 +51,20 @@ class Lexer {
}
// check for close
if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
state = State::Text;
pos = tok_start + close_trim.size();
Token tok = make_token(closeKind);
skip_whitespaces_and_newlines();
return tok;
}
if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
state = State::Text;
pos = tok_start + close.size();
Token tok = make_token(closeKind);
if (trim) {
skip_newline();
skip_whitespaces_and_first_newline();
}
return tok;
}
@@ -164,8 +174,9 @@ class Lexer {
Token scan_string() {
bool escape {false};
for (;;) {
if (pos >= m_in.size())
if (pos >= m_in.size()) {
break;
}
char ch = m_in[pos++];
if (ch == '\\') {
escape = true;
@@ -180,7 +191,21 @@ class Lexer {
Token make_token(Token::Kind kind) const { return Token(kind, string_view::slice(m_in, tok_start, pos)); }
void skip_newline() {
void skip_whitespaces_and_newlines() {
if (pos < m_in.size()) {
while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t' || m_in[pos] == '\n' || m_in[pos] == '\r')) {
pos += 1;
}
}
}
void skip_whitespaces_and_first_newline() {
if (pos < m_in.size()) {
while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t')) {
pos += 1;
}
}
if (pos < m_in.size()) {
char ch = m_in[pos];
if (ch == '\n') {
@@ -249,8 +274,15 @@ public:
if (inja::string_view::starts_with(open_str, config.expression_open)) {
state = State::ExpressionStart;
} else if (inja::string_view::starts_with(open_str, config.statement_open)) {
state = State::StatementStart;
must_lstrip = config.lstrip_blocks;
if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
state = State::StatementStartNoLstrip;
} else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip )) {
state = State::StatementStartForceLstrip;
must_lstrip = true;
} else {
state = State::StatementStart;
must_lstrip = config.lstrip_blocks;
}
} else if (inja::string_view::starts_with(open_str, config.comment_open)) {
state = State::CommentStart;
must_lstrip = config.lstrip_blocks;
@@ -263,11 +295,13 @@ public:
}
nonstd::string_view text = string_view::slice(m_in, tok_start, pos);
if (must_lstrip)
if (must_lstrip) {
text = clear_final_line_if_whitespace(text);
}
if (text.empty())
if (text.empty()) {
goto again; // don't generate empty token
}
return Token(Token::Kind::Text, text);
}
case State::ExpressionStart: {
@@ -285,6 +319,16 @@ public:
pos += config.statement_open.size();
return make_token(Token::Kind::StatementOpen);
}
case State::StatementStartNoLstrip: {
state = State::StatementBody;
pos += config.statement_open_no_lstrip.size();
return make_token(Token::Kind::StatementOpen);
}
case State::StatementStartForceLstrip: {
state = State::StatementBody;
pos += config.statement_open_force_lstrip.size();
return make_token(Token::Kind::StatementOpen);
}
case State::CommentStart: {
state = State::CommentBody;
pos += config.comment_open.size();
@@ -295,7 +339,7 @@ public:
case State::LineBody:
return scan_body("\n", Token::Kind::LineStatementClose);
case State::StatementBody:
return scan_body(config.statement_close, Token::Kind::StatementClose, config.trim_blocks);
return scan_body(config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
case State::CommentBody: {
// fast-scan to comment close
size_t end = m_in.substr(pos).find(config.comment_close);
@@ -308,7 +352,7 @@ public:
pos += end + config.comment_close.size();
Token tok = make_token(Token::Kind::CommentClose);
if (config.trim_blocks) {
skip_newline();
skip_whitespaces_and_first_newline();
}
return tok;
}

View File

@@ -447,6 +447,7 @@ public:
tmpl.nodes.back().value = key_token.text;
}
tmpl.nodes.back().str = static_cast<std::string>(value_token.text);
tmpl.nodes.back().view = value_token.text;
} else if (tok.text == static_cast<decltype(tok.text)>("endfor")) {
get_next_token();
if (loop_stack.empty()) {
@@ -514,6 +515,7 @@ public:
last.op = Node::Op::Callback;
last.args = num_args;
last.str = static_cast<std::string>(name);
last.view = name;
return;
}
}
@@ -521,6 +523,7 @@ public:
// otherwise just add it to the end
tmpl.nodes.emplace_back(Node::Op::Callback, num_args);
tmpl.nodes.back().str = static_cast<std::string>(name);
tmpl.nodes.back().view = name;
}
void parse_into(Template &tmpl, nonstd::string_view path) {

View File

@@ -1455,7 +1455,10 @@ enum class ElementNotation { Dot, Pointer };
*/
struct LexerConfig {
std::string statement_open {"{%"};
std::string statement_open_no_lstrip {"{%+"};
std::string statement_open_force_lstrip {"{%-"};
std::string statement_close {"%}"};
std::string statement_close_force_rstrip {"-%}"};
std::string line_statement {"##"};
std::string expression_open {"{{"};
std::string expression_close {"}}"};
@@ -1474,6 +1477,12 @@ struct LexerConfig {
if (open_chars.find(statement_open[0]) == std::string::npos) {
open_chars += statement_open[0];
}
if (open_chars.find(statement_open_no_lstrip[0]) == std::string::npos) {
open_chars += statement_open_no_lstrip[0];
}
if (open_chars.find(statement_open_force_lstrip[0]) == std::string::npos) {
open_chars += statement_open_force_lstrip[0];
}
if (open_chars.find(expression_open[0]) == std::string::npos) {
open_chars += expression_open[0];
}
@@ -1966,6 +1975,8 @@ class Lexer {
LineStart,
LineBody,
StatementStart,
StatementStartNoLstrip,
StatementStartForceLstrip,
StatementBody,
CommentStart,
CommentBody
@@ -1979,7 +1990,7 @@ class Lexer {
size_t pos;
Token scan_body(nonstd::string_view close, Token::Kind closeKind, bool trim = false) {
Token scan_body(nonstd::string_view close, Token::Kind closeKind, nonstd::string_view close_trim = nonstd::string_view(), bool trim = false) {
again:
// skip whitespace (except for \n as it might be a close)
if (tok_start >= m_in.size()) {
@@ -1992,12 +2003,20 @@ class Lexer {
}
// check for close
if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
state = State::Text;
pos = tok_start + close_trim.size();
Token tok = make_token(closeKind);
skip_whitespaces_and_newlines();
return tok;
}
if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
state = State::Text;
pos = tok_start + close.size();
Token tok = make_token(closeKind);
if (trim) {
skip_newline();
skip_whitespaces_and_first_newline();
}
return tok;
}
@@ -2107,8 +2126,9 @@ class Lexer {
Token scan_string() {
bool escape {false};
for (;;) {
if (pos >= m_in.size())
if (pos >= m_in.size()) {
break;
}
char ch = m_in[pos++];
if (ch == '\\') {
escape = true;
@@ -2123,7 +2143,21 @@ class Lexer {
Token make_token(Token::Kind kind) const { return Token(kind, string_view::slice(m_in, tok_start, pos)); }
void skip_newline() {
void skip_whitespaces_and_newlines() {
if (pos < m_in.size()) {
while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t' || m_in[pos] == '\n' || m_in[pos] == '\r')) {
pos += 1;
}
}
}
void skip_whitespaces_and_first_newline() {
if (pos < m_in.size()) {
while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t')) {
pos += 1;
}
}
if (pos < m_in.size()) {
char ch = m_in[pos];
if (ch == '\n') {
@@ -2192,8 +2226,15 @@ public:
if (inja::string_view::starts_with(open_str, config.expression_open)) {
state = State::ExpressionStart;
} else if (inja::string_view::starts_with(open_str, config.statement_open)) {
state = State::StatementStart;
must_lstrip = config.lstrip_blocks;
if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
state = State::StatementStartNoLstrip;
} else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip )) {
state = State::StatementStartForceLstrip;
must_lstrip = true;
} else {
state = State::StatementStart;
must_lstrip = config.lstrip_blocks;
}
} else if (inja::string_view::starts_with(open_str, config.comment_open)) {
state = State::CommentStart;
must_lstrip = config.lstrip_blocks;
@@ -2206,11 +2247,13 @@ public:
}
nonstd::string_view text = string_view::slice(m_in, tok_start, pos);
if (must_lstrip)
if (must_lstrip) {
text = clear_final_line_if_whitespace(text);
}
if (text.empty())
if (text.empty()) {
goto again; // don't generate empty token
}
return Token(Token::Kind::Text, text);
}
case State::ExpressionStart: {
@@ -2228,6 +2271,16 @@ public:
pos += config.statement_open.size();
return make_token(Token::Kind::StatementOpen);
}
case State::StatementStartNoLstrip: {
state = State::StatementBody;
pos += config.statement_open_no_lstrip.size();
return make_token(Token::Kind::StatementOpen);
}
case State::StatementStartForceLstrip: {
state = State::StatementBody;
pos += config.statement_open_force_lstrip.size();
return make_token(Token::Kind::StatementOpen);
}
case State::CommentStart: {
state = State::CommentBody;
pos += config.comment_open.size();
@@ -2238,7 +2291,7 @@ public:
case State::LineBody:
return scan_body("\n", Token::Kind::LineStatementClose);
case State::StatementBody:
return scan_body(config.statement_close, Token::Kind::StatementClose, config.trim_blocks);
return scan_body(config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
case State::CommentBody: {
// fast-scan to comment close
size_t end = m_in.substr(pos).find(config.comment_close);
@@ -2251,7 +2304,7 @@ public:
pos += end + config.comment_close.size();
Token tok = make_token(Token::Kind::CommentClose);
if (config.trim_blocks) {
skip_newline();
skip_whitespaces_and_first_newline();
}
return tok;
}
@@ -2743,6 +2796,7 @@ public:
tmpl.nodes.back().value = key_token.text;
}
tmpl.nodes.back().str = static_cast<std::string>(value_token.text);
tmpl.nodes.back().view = value_token.text;
} else if (tok.text == static_cast<decltype(tok.text)>("endfor")) {
get_next_token();
if (loop_stack.empty()) {
@@ -2810,6 +2864,7 @@ public:
last.op = Node::Op::Callback;
last.args = num_args;
last.str = static_cast<std::string>(name);
last.view = name;
return;
}
}
@@ -2817,6 +2872,7 @@ public:
// otherwise just add it to the end
tmpl.nodes.emplace_back(Node::Op::Callback, num_args);
tmpl.nodes.back().str = static_cast<std::string>(name);
tmpl.nodes.back().view = name;
}
void parse_into(Template &tmpl, nonstd::string_view path) {
@@ -3558,7 +3614,10 @@ public:
/// Sets the opener and closer for template statements
void set_statement(const std::string &open, const std::string &close) {
lexer_config.statement_open = open;
lexer_config.statement_open_no_lstrip = open + "+";
lexer_config.statement_open_force_lstrip = open + "-";
lexer_config.statement_close = close;
lexer_config.statement_close_force_rstrip = "-" + close;
lexer_config.update_open_chars();
}

View File

@@ -428,6 +428,33 @@ TEST_CASE("templates") {
CHECK(t2.count_variables() == 3);
CHECK(t3.count_variables() == 5);
}
SUBCASE("whitespace control") {
inja::Environment env;
CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}", data) == "Peter");
CHECK(env.render(" {% if is_happy %}{{ name }}{% endif %} ", data) == " Peter ");
CHECK(env.render(" {% if is_happy %}{{ name }}{% endif %}\n ", data) == " Peter\n ");
CHECK(env.render("Test\n {%- if is_happy %}{{ name }}{% endif %} ", data) == "Test\nPeter ");
CHECK(env.render(" {%+ if is_happy %}{{ name }}{% endif %}", data) == " Peter");
CHECK(env.render(" {%- if is_happy %}{{ name }}{% endif -%} \n ", data) == "Peter");
// Nothing will be stripped if there are other characters before the start of the block.
CHECK(env.render(". {%- if is_happy %}{{ name }}{% endif -%}\n", data) == ". Peter");
env.set_lstrip_blocks(true);
CHECK(env.render(" {% if is_happy %}{{ name }}{% endif %}", data) == "Peter");
CHECK(env.render(" {% if is_happy %}{{ name }}{% endif %} ", data) == "Peter ");
CHECK(env.render(" {% if is_happy %}{{ name }}{% endif -%} ", data) == "Peter");
CHECK(env.render(" {%+ if is_happy %}{{ name }}{% endif %}", data) == " Peter");
CHECK(env.render("\n {%+ if is_happy %}{{ name }}{% endif -%} ", data) == "\n Peter");
CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}\n", data) == "Peter\n");
env.set_trim_blocks(true);
CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}", data) == "Peter");
CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}\n", data) == "Peter");
CHECK(env.render("{% if is_happy %}{{ name }}{% endif %} \n.", data) == "Peter.");
CHECK(env.render("{%- if is_happy %}{{ name }}{% endif -%} \n.", data) == "Peter.");
}
}
TEST_CASE("other-syntax") {