From c4d99c94554975b862a67791650e32e0854a725a Mon Sep 17 00:00:00 2001
From: pantor <lars.berscheid@online.de>
Date: Mon, 29 Jun 2020 23:12:12 +0200
Subject: [PATCH] add finegrained whitespace control

---
 README.md                    |  8 +++-
 include/inja/config.hpp      |  9 ++++
 include/inja/environment.hpp |  3 ++
 include/inja/lexer.hpp       | 64 ++++++++++++++++++++++++-----
 include/inja/parser.hpp      |  3 ++
 single_include/inja/inja.hpp | 79 +++++++++++++++++++++++++++++++-----
 test/unit-renderer.cpp       | 27 ++++++++++++
 7 files changed, 172 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 87c2a39..57341d9 100644
--- a/README.md
+++ b/README.md
@@ -260,7 +260,13 @@ env.set_trim_blocks(true);
 env.set_lstrip_blocks(true);
 ```
 
-With both `trim_blocks` and `lstrip_blocks` enabled, you can put statements on their own lines.
+With both `trim_blocks` and `lstrip_blocks` enabled, you can put statements on their own lines. Furthermore, you can also strip whitespaces by hand. If you add a minus sign (`-`) to the start or end of a statement, the whitespaces before or after that block will be removed:
+
+```.cpp
+render("""{% if neighbour in guests -%}   I was there{% endif -%}   !""", data); // Renders without any whitespaces
+```
+
+Stripping behind a statement also remove any newlines.
 
 ### Callbacks
 
diff --git a/include/inja/config.hpp b/include/inja/config.hpp
index 12dd94a..dc80746 100644
--- a/include/inja/config.hpp
+++ b/include/inja/config.hpp
@@ -17,7 +17,10 @@ enum class ElementNotation { Dot, Pointer };
  */
 struct LexerConfig {
   std::string statement_open {"{%"};
+  std::string statement_open_no_lstrip {"{%+"};
+  std::string statement_open_force_lstrip {"{%-"};
   std::string statement_close {"%}"};
+  std::string statement_close_force_rstrip {"-%}"};
   std::string line_statement {"##"};
   std::string expression_open {"{{"};
   std::string expression_close {"}}"};
@@ -36,6 +39,12 @@ struct LexerConfig {
     if (open_chars.find(statement_open[0]) == std::string::npos) {
       open_chars += statement_open[0];
     }
+    if (open_chars.find(statement_open_no_lstrip[0]) == std::string::npos) {
+      open_chars += statement_open_no_lstrip[0];
+    }
+    if (open_chars.find(statement_open_force_lstrip[0]) == std::string::npos) {
+      open_chars += statement_open_force_lstrip[0];
+    }
     if (open_chars.find(expression_open[0]) == std::string::npos) {
       open_chars += expression_open[0];
     }
diff --git a/include/inja/environment.hpp b/include/inja/environment.hpp
index fb9996c..23deb5d 100644
--- a/include/inja/environment.hpp
+++ b/include/inja/environment.hpp
@@ -47,7 +47,10 @@ public:
   /// Sets the opener and closer for template statements
   void set_statement(const std::string &open, const std::string &close) {
     lexer_config.statement_open = open;
+    lexer_config.statement_open_no_lstrip = open + "+";
+    lexer_config.statement_open_force_lstrip = open + "-";
     lexer_config.statement_close = close;
+    lexer_config.statement_close_force_rstrip = "-" + close;
     lexer_config.update_open_chars();
   }
 
diff --git a/include/inja/lexer.hpp b/include/inja/lexer.hpp
index e9e2930..8e77d28 100644
--- a/include/inja/lexer.hpp
+++ b/include/inja/lexer.hpp
@@ -23,6 +23,8 @@ class Lexer {
     LineStart,
     LineBody,
     StatementStart,
+    StatementStartNoLstrip,
+    StatementStartForceLstrip,
     StatementBody,
     CommentStart,
     CommentBody
@@ -36,7 +38,7 @@ class Lexer {
   size_t pos;
 
 
-  Token scan_body(nonstd::string_view close, Token::Kind closeKind, bool trim = false) {
+  Token scan_body(nonstd::string_view close, Token::Kind closeKind, nonstd::string_view close_trim = nonstd::string_view(), bool trim = false) {
   again:
     // skip whitespace (except for \n as it might be a close)
     if (tok_start >= m_in.size()) {
@@ -49,12 +51,20 @@ class Lexer {
     }
 
     // check for close
+    if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
+      state = State::Text;
+      pos = tok_start + close_trim.size();
+      Token tok = make_token(closeKind);
+      skip_whitespaces_and_newlines();
+      return tok;
+    }
+
     if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
       state = State::Text;
       pos = tok_start + close.size();
       Token tok = make_token(closeKind);
       if (trim) {
-        skip_newline();
+        skip_whitespaces_and_first_newline();
       }
       return tok;
     }
@@ -164,8 +174,9 @@ class Lexer {
   Token scan_string() {
     bool escape {false};
     for (;;) {
-      if (pos >= m_in.size())
+      if (pos >= m_in.size()) {
         break;
+      }
       char ch = m_in[pos++];
       if (ch == '\\') {
         escape = true;
@@ -180,7 +191,21 @@ class Lexer {
 
   Token make_token(Token::Kind kind) const { return Token(kind, string_view::slice(m_in, tok_start, pos)); }
 
-  void skip_newline() {
+  void skip_whitespaces_and_newlines() {
+    if (pos < m_in.size()) {
+      while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t' || m_in[pos] == '\n' || m_in[pos] == '\r')) {
+        pos += 1;
+      }
+    }
+  }
+
+  void skip_whitespaces_and_first_newline() {
+    if (pos < m_in.size()) {
+      while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t')) {
+        pos += 1;
+      }
+    }
+
     if (pos < m_in.size()) {
       char ch = m_in[pos];
       if (ch == '\n') {
@@ -249,8 +274,15 @@ public:
       if (inja::string_view::starts_with(open_str, config.expression_open)) {
         state = State::ExpressionStart;
       } else if (inja::string_view::starts_with(open_str, config.statement_open)) {
-        state = State::StatementStart;
-        must_lstrip = config.lstrip_blocks;
+        if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
+          state = State::StatementStartNoLstrip;
+        } else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip )) {
+          state = State::StatementStartForceLstrip;
+          must_lstrip = true;
+        } else {
+          state = State::StatementStart;
+          must_lstrip = config.lstrip_blocks;
+        }
       } else if (inja::string_view::starts_with(open_str, config.comment_open)) {
         state = State::CommentStart;
         must_lstrip = config.lstrip_blocks;
@@ -263,11 +295,13 @@ public:
       }
 
       nonstd::string_view text = string_view::slice(m_in, tok_start, pos);
-      if (must_lstrip)
+      if (must_lstrip) {
         text = clear_final_line_if_whitespace(text);
+      }
 
-      if (text.empty())
+      if (text.empty()) {
         goto again; // don't generate empty token
+      }
       return Token(Token::Kind::Text, text);
     }
     case State::ExpressionStart: {
@@ -285,6 +319,16 @@ public:
       pos += config.statement_open.size();
       return make_token(Token::Kind::StatementOpen);
     }
+    case State::StatementStartNoLstrip: {
+      state = State::StatementBody;
+      pos += config.statement_open_no_lstrip.size();
+      return make_token(Token::Kind::StatementOpen);
+    }
+    case State::StatementStartForceLstrip: {
+      state = State::StatementBody;
+      pos += config.statement_open_force_lstrip.size();
+      return make_token(Token::Kind::StatementOpen);
+    }
     case State::CommentStart: {
       state = State::CommentBody;
       pos += config.comment_open.size();
@@ -295,7 +339,7 @@ public:
     case State::LineBody:
       return scan_body("\n", Token::Kind::LineStatementClose);
     case State::StatementBody:
-      return scan_body(config.statement_close, Token::Kind::StatementClose, config.trim_blocks);
+      return scan_body(config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
     case State::CommentBody: {
       // fast-scan to comment close
       size_t end = m_in.substr(pos).find(config.comment_close);
@@ -308,7 +352,7 @@ public:
       pos += end + config.comment_close.size();
       Token tok = make_token(Token::Kind::CommentClose);
       if (config.trim_blocks) {
-        skip_newline();
+        skip_whitespaces_and_first_newline();
       }
       return tok;
     }
diff --git a/include/inja/parser.hpp b/include/inja/parser.hpp
index 975d7b2..d2cb76f 100644
--- a/include/inja/parser.hpp
+++ b/include/inja/parser.hpp
@@ -447,6 +447,7 @@ public:
         tmpl.nodes.back().value = key_token.text;
       }
       tmpl.nodes.back().str = static_cast<std::string>(value_token.text);
+      tmpl.nodes.back().view = value_token.text;
     } else if (tok.text == static_cast<decltype(tok.text)>("endfor")) {
       get_next_token();
       if (loop_stack.empty()) {
@@ -514,6 +515,7 @@ public:
         last.op = Node::Op::Callback;
         last.args = num_args;
         last.str = static_cast<std::string>(name);
+        last.view = name;
         return;
       }
     }
@@ -521,6 +523,7 @@ public:
     // otherwise just add it to the end
     tmpl.nodes.emplace_back(Node::Op::Callback, num_args);
     tmpl.nodes.back().str = static_cast<std::string>(name);
+    tmpl.nodes.back().view = name;
   }
 
   void parse_into(Template &tmpl, nonstd::string_view path) {
diff --git a/single_include/inja/inja.hpp b/single_include/inja/inja.hpp
index b9e7da0..43691ca 100644
--- a/single_include/inja/inja.hpp
+++ b/single_include/inja/inja.hpp
@@ -1455,7 +1455,10 @@ enum class ElementNotation { Dot, Pointer };
  */
 struct LexerConfig {
   std::string statement_open {"{%"};
+  std::string statement_open_no_lstrip {"{%+"};
+  std::string statement_open_force_lstrip {"{%-"};
   std::string statement_close {"%}"};
+  std::string statement_close_force_rstrip {"-%}"};
   std::string line_statement {"##"};
   std::string expression_open {"{{"};
   std::string expression_close {"}}"};
@@ -1474,6 +1477,12 @@ struct LexerConfig {
     if (open_chars.find(statement_open[0]) == std::string::npos) {
       open_chars += statement_open[0];
     }
+    if (open_chars.find(statement_open_no_lstrip[0]) == std::string::npos) {
+      open_chars += statement_open_no_lstrip[0];
+    }
+    if (open_chars.find(statement_open_force_lstrip[0]) == std::string::npos) {
+      open_chars += statement_open_force_lstrip[0];
+    }
     if (open_chars.find(expression_open[0]) == std::string::npos) {
       open_chars += expression_open[0];
     }
@@ -1966,6 +1975,8 @@ class Lexer {
     LineStart,
     LineBody,
     StatementStart,
+    StatementStartNoLstrip,
+    StatementStartForceLstrip,
     StatementBody,
     CommentStart,
     CommentBody
@@ -1979,7 +1990,7 @@ class Lexer {
   size_t pos;
 
 
-  Token scan_body(nonstd::string_view close, Token::Kind closeKind, bool trim = false) {
+  Token scan_body(nonstd::string_view close, Token::Kind closeKind, nonstd::string_view close_trim = nonstd::string_view(), bool trim = false) {
   again:
     // skip whitespace (except for \n as it might be a close)
     if (tok_start >= m_in.size()) {
@@ -1992,12 +2003,20 @@ class Lexer {
     }
 
     // check for close
+    if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
+      state = State::Text;
+      pos = tok_start + close_trim.size();
+      Token tok = make_token(closeKind);
+      skip_whitespaces_and_newlines();
+      return tok;
+    }
+
     if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
       state = State::Text;
       pos = tok_start + close.size();
       Token tok = make_token(closeKind);
       if (trim) {
-        skip_newline();
+        skip_whitespaces_and_first_newline();
       }
       return tok;
     }
@@ -2107,8 +2126,9 @@ class Lexer {
   Token scan_string() {
     bool escape {false};
     for (;;) {
-      if (pos >= m_in.size())
+      if (pos >= m_in.size()) {
         break;
+      }
       char ch = m_in[pos++];
       if (ch == '\\') {
         escape = true;
@@ -2123,7 +2143,21 @@ class Lexer {
 
   Token make_token(Token::Kind kind) const { return Token(kind, string_view::slice(m_in, tok_start, pos)); }
 
-  void skip_newline() {
+  void skip_whitespaces_and_newlines() {
+    if (pos < m_in.size()) {
+      while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t' || m_in[pos] == '\n' || m_in[pos] == '\r')) {
+        pos += 1;
+      }
+    }
+  }
+
+  void skip_whitespaces_and_first_newline() {
+    if (pos < m_in.size()) {
+      while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t')) {
+        pos += 1;
+      }
+    }
+
     if (pos < m_in.size()) {
       char ch = m_in[pos];
       if (ch == '\n') {
@@ -2192,8 +2226,15 @@ public:
       if (inja::string_view::starts_with(open_str, config.expression_open)) {
         state = State::ExpressionStart;
       } else if (inja::string_view::starts_with(open_str, config.statement_open)) {
-        state = State::StatementStart;
-        must_lstrip = config.lstrip_blocks;
+        if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
+          state = State::StatementStartNoLstrip;
+        } else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip )) {
+          state = State::StatementStartForceLstrip;
+          must_lstrip = true;
+        } else {
+          state = State::StatementStart;
+          must_lstrip = config.lstrip_blocks;
+        }
       } else if (inja::string_view::starts_with(open_str, config.comment_open)) {
         state = State::CommentStart;
         must_lstrip = config.lstrip_blocks;
@@ -2206,11 +2247,13 @@ public:
       }
 
       nonstd::string_view text = string_view::slice(m_in, tok_start, pos);
-      if (must_lstrip)
+      if (must_lstrip) {
         text = clear_final_line_if_whitespace(text);
+      }
 
-      if (text.empty())
+      if (text.empty()) {
         goto again; // don't generate empty token
+      }
       return Token(Token::Kind::Text, text);
     }
     case State::ExpressionStart: {
@@ -2228,6 +2271,16 @@ public:
       pos += config.statement_open.size();
       return make_token(Token::Kind::StatementOpen);
     }
+    case State::StatementStartNoLstrip: {
+      state = State::StatementBody;
+      pos += config.statement_open_no_lstrip.size();
+      return make_token(Token::Kind::StatementOpen);
+    }
+    case State::StatementStartForceLstrip: {
+      state = State::StatementBody;
+      pos += config.statement_open_force_lstrip.size();
+      return make_token(Token::Kind::StatementOpen);
+    }
     case State::CommentStart: {
       state = State::CommentBody;
       pos += config.comment_open.size();
@@ -2238,7 +2291,7 @@ public:
     case State::LineBody:
       return scan_body("\n", Token::Kind::LineStatementClose);
     case State::StatementBody:
-      return scan_body(config.statement_close, Token::Kind::StatementClose, config.trim_blocks);
+      return scan_body(config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
     case State::CommentBody: {
       // fast-scan to comment close
       size_t end = m_in.substr(pos).find(config.comment_close);
@@ -2251,7 +2304,7 @@ public:
       pos += end + config.comment_close.size();
       Token tok = make_token(Token::Kind::CommentClose);
       if (config.trim_blocks) {
-        skip_newline();
+        skip_whitespaces_and_first_newline();
       }
       return tok;
     }
@@ -2743,6 +2796,7 @@ public:
         tmpl.nodes.back().value = key_token.text;
       }
       tmpl.nodes.back().str = static_cast<std::string>(value_token.text);
+      tmpl.nodes.back().view = value_token.text;
     } else if (tok.text == static_cast<decltype(tok.text)>("endfor")) {
       get_next_token();
       if (loop_stack.empty()) {
@@ -2810,6 +2864,7 @@ public:
         last.op = Node::Op::Callback;
         last.args = num_args;
         last.str = static_cast<std::string>(name);
+        last.view = name;
         return;
       }
     }
@@ -2817,6 +2872,7 @@ public:
     // otherwise just add it to the end
     tmpl.nodes.emplace_back(Node::Op::Callback, num_args);
     tmpl.nodes.back().str = static_cast<std::string>(name);
+    tmpl.nodes.back().view = name;
   }
 
   void parse_into(Template &tmpl, nonstd::string_view path) {
@@ -3558,7 +3614,10 @@ public:
   /// Sets the opener and closer for template statements
   void set_statement(const std::string &open, const std::string &close) {
     lexer_config.statement_open = open;
+    lexer_config.statement_open_no_lstrip = open + "+";
+    lexer_config.statement_open_force_lstrip = open + "-";
     lexer_config.statement_close = close;
+    lexer_config.statement_close_force_rstrip = "-" + close;
     lexer_config.update_open_chars();
   }
 
diff --git a/test/unit-renderer.cpp b/test/unit-renderer.cpp
index fb2dc08..c2d347f 100644
--- a/test/unit-renderer.cpp
+++ b/test/unit-renderer.cpp
@@ -428,6 +428,33 @@ TEST_CASE("templates") {
     CHECK(t2.count_variables() == 3);
     CHECK(t3.count_variables() == 5);
   }
+
+  SUBCASE("whitespace control") {
+    inja::Environment env;
+    CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}", data) == "Peter");
+    CHECK(env.render("   {% if is_happy %}{{ name }}{% endif %}   ", data) == "   Peter   ");
+    CHECK(env.render("   {% if is_happy %}{{ name }}{% endif %}\n ", data) == "   Peter\n ");
+    CHECK(env.render("Test\n   {%- if is_happy %}{{ name }}{% endif %}   ", data) == "Test\nPeter   ");
+    CHECK(env.render("   {%+ if is_happy %}{{ name }}{% endif %}", data) == "   Peter");
+    CHECK(env.render("   {%- if is_happy %}{{ name }}{% endif -%}   \n   ", data) == "Peter");
+
+    // Nothing will be stripped if there are other characters before the start of the block.
+    CHECK(env.render(".  {%- if is_happy %}{{ name }}{% endif -%}\n", data) == ".  Peter");
+
+    env.set_lstrip_blocks(true);
+    CHECK(env.render("   {% if is_happy %}{{ name }}{% endif %}", data) == "Peter");
+    CHECK(env.render("   {% if is_happy %}{{ name }}{% endif %}   ", data) == "Peter   ");
+    CHECK(env.render("   {% if is_happy %}{{ name }}{% endif -%}   ", data) == "Peter");
+    CHECK(env.render("   {%+ if is_happy %}{{ name }}{% endif %}", data) == "   Peter");
+    CHECK(env.render("\n   {%+ if is_happy %}{{ name }}{% endif -%}   ", data) == "\n   Peter");
+    CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}\n", data) == "Peter\n");
+
+    env.set_trim_blocks(true);
+    CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}", data) == "Peter");
+    CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}\n", data) == "Peter");
+    CHECK(env.render("{% if is_happy %}{{ name }}{% endif %}   \n.", data) == "Peter.");
+    CHECK(env.render("{%- if is_happy %}{{ name }}{% endif -%}   \n.", data) == "Peter.");
+  }
 }
 
 TEST_CASE("other-syntax") {