vendor/twig/twig/src/Lexer.php line 440

Open in your IDE?
  1. <?php
  2. /*
  3.  * This file is part of Twig.
  4.  *
  5.  * (c) Fabien Potencier
  6.  * (c) Armin Ronacher
  7.  *
  8.  * For the full copyright and license information, please view the LICENSE
  9.  * file that was distributed with this source code.
  10.  */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. /**
  14.  * @author Fabien Potencier <fabien@symfony.com>
  15.  */
  16. class Lexer
  17. {
  18.     private $isInitialized false;
  19.     private $tokens;
  20.     private $code;
  21.     private $cursor;
  22.     private $lineno;
  23.     private $end;
  24.     private $state;
  25.     private $states;
  26.     private $brackets;
  27.     private $env;
  28.     private $source;
  29.     private $options;
  30.     private $regexes;
  31.     private $position;
  32.     private $positions;
  33.     private $currentVarBlockLine;
  34.     public const STATE_DATA 0;
  35.     public const STATE_BLOCK 1;
  36.     public const STATE_VAR 2;
  37.     public const STATE_STRING 3;
  38.     public const STATE_INTERPOLATION 4;
  39.     public const REGEX_NAME '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  40.     public const REGEX_NUMBER '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
  41.     public const REGEX_STRING '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  42.     public const REGEX_DQ_STRING_DELIM '/"/A';
  43.     public const REGEX_DQ_STRING_PART '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  44.     public const PUNCTUATION '()[]{}?:.,|';
  45.     private const SPECIAL_CHARS = [
  46.         'f' => "\f",
  47.         'n' => "\n",
  48.         'r' => "\r",
  49.         't' => "\t",
  50.         'v' => "\v",
  51.     ];
  52.     public function __construct(Environment $env, array $options = [])
  53.     {
  54.         $this->env $env;
  55.         $this->options array_merge([
  56.             'tag_comment' => ['{#''#}'],
  57.             'tag_block' => ['{%''%}'],
  58.             'tag_variable' => ['{{''}}'],
  59.             'whitespace_trim' => '-',
  60.             'whitespace_line_trim' => '~',
  61.             'whitespace_line_chars' => ' \t\0\x0B',
  62.             'interpolation' => ['#{''}'],
  63.         ], $options);
  64.     }
  65.     private function initialize()
  66.     {
  67.         if ($this->isInitialized) {
  68.             return;
  69.         }
  70.         // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  71.         $this->regexes = [
  72.             // }}
  73.             'lex_var' => '{
  74.                 \s*
  75.                 (?:'.
  76.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'// -}}\s*
  77.                     '|'.
  78.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~}}[ \t\0\x0B]*
  79.                     '|'.
  80.                     preg_quote($this->options['tag_variable'][1], '#'). // }}
  81.                 ')
  82.             }Ax',
  83.             // %}
  84.             'lex_block' => '{
  85.                 \s*
  86.                 (?:'.
  87.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'// -%}\s*\n?
  88.                     '|'.
  89.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  90.                     '|'.
  91.                     preg_quote($this->options['tag_block'][1], '#').'\n?'// %}\n?
  92.                 ')
  93.             }Ax',
  94.             // {% endverbatim %}
  95.             'lex_raw_data' => '{'.
  96.                 preg_quote($this->options['tag_block'][0], '#'). // {%
  97.                 '('.
  98.                     $this->options['whitespace_trim']. // -
  99.                     '|'.
  100.                     $this->options['whitespace_line_trim']. // ~
  101.                 ')?\s*endverbatim\s*'.
  102.                 '(?:'.
  103.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'// -%}
  104.                     '|'.
  105.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  106.                     '|'.
  107.                     preg_quote($this->options['tag_block'][1], '#'). // %}
  108.                 ')
  109.             }sx',
  110.             'operator' => $this->getOperatorRegex(),
  111.             // #}
  112.             'lex_comment' => '{
  113.                 (?:'.
  114.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'// -#}\s*\n?
  115.                     '|'.
  116.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~#}[ \t\0\x0B]*
  117.                     '|'.
  118.                     preg_quote($this->options['tag_comment'][1], '#').'\n?'// #}\n?
  119.                 ')
  120.             }sx',
  121.             // verbatim %}
  122.             'lex_block_raw' => '{
  123.                 \s*verbatim\s*
  124.                 (?:'.
  125.                     preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'// -%}\s*
  126.                     '|'.
  127.                     preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'// ~%}[ \t\0\x0B]*
  128.                     '|'.
  129.                     preg_quote($this->options['tag_block'][1], '#'). // %}
  130.                 ')
  131.             }Asx',
  132.             'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  133.             // {{ or {% or {#
  134.             'lex_tokens_start' => '{
  135.                 ('.
  136.                     preg_quote($this->options['tag_variable'][0], '#'). // {{
  137.                     '|'.
  138.                     preg_quote($this->options['tag_block'][0], '#'). // {%
  139.                     '|'.
  140.                     preg_quote($this->options['tag_comment'][0], '#'). // {#
  141.                 ')('.
  142.                     preg_quote($this->options['whitespace_trim'], '#'). // -
  143.                     '|'.
  144.                     preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  145.                 ')?
  146.             }sx',
  147.             'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  148.             'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  149.         ];
  150.         $this->isInitialized true;
  151.     }
  152.     public function tokenize(Source $source): TokenStream
  153.     {
  154.         $this->initialize();
  155.         $this->source $source;
  156.         $this->code str_replace(["\r\n""\r"], "\n"$source->getCode());
  157.         $this->cursor 0;
  158.         $this->lineno 1;
  159.         $this->end \strlen($this->code);
  160.         $this->tokens = [];
  161.         $this->state self::STATE_DATA;
  162.         $this->states = [];
  163.         $this->brackets = [];
  164.         $this->position = -1;
  165.         // find all token starts in one go
  166.         preg_match_all($this->regexes['lex_tokens_start'], $this->code$matches\PREG_OFFSET_CAPTURE);
  167.         $this->positions $matches;
  168.         while ($this->cursor $this->end) {
  169.             // dispatch to the lexing functions depending
  170.             // on the current state
  171.             switch ($this->state) {
  172.                 case self::STATE_DATA:
  173.                     $this->lexData();
  174.                     break;
  175.                 case self::STATE_BLOCK:
  176.                     $this->lexBlock();
  177.                     break;
  178.                 case self::STATE_VAR:
  179.                     $this->lexVar();
  180.                     break;
  181.                 case self::STATE_STRING:
  182.                     $this->lexString();
  183.                     break;
  184.                 case self::STATE_INTERPOLATION:
  185.                     $this->lexInterpolation();
  186.                     break;
  187.             }
  188.         }
  189.         $this->pushToken(Token::EOF_TYPE);
  190.         if (!empty($this->brackets)) {
  191.             [$expect$lineno] = array_pop($this->brackets);
  192.             throw new SyntaxError(\sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  193.         }
  194.         return new TokenStream($this->tokens$this->source);
  195.     }
  196.     private function lexData(): void
  197.     {
  198.         // if no matches are left we return the rest of the template as simple text token
  199.         if ($this->position == \count($this->positions[0]) - 1) {
  200.             $this->pushToken(Token::TEXT_TYPEsubstr($this->code$this->cursor));
  201.             $this->cursor $this->end;
  202.             return;
  203.         }
  204.         // Find the first token after the current cursor
  205.         $position $this->positions[0][++$this->position];
  206.         while ($position[1] < $this->cursor) {
  207.             if ($this->position == \count($this->positions[0]) - 1) {
  208.                 return;
  209.             }
  210.             $position $this->positions[0][++$this->position];
  211.         }
  212.         // push the template text first
  213.         $text $textContent substr($this->code$this->cursor$position[1] - $this->cursor);
  214.         // trim?
  215.         if (isset($this->positions[2][$this->position][0])) {
  216.             if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  217.                 // whitespace_trim detected ({%-, {{- or {#-)
  218.                 $text rtrim($text);
  219.             } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  220.                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  221.                 // don't trim \r and \n
  222.                 $text rtrim($text" \t\0\x0B");
  223.             }
  224.         }
  225.         $this->pushToken(Token::TEXT_TYPE$text);
  226.         $this->moveCursor($textContent.$position[0]);
  227.         switch ($this->positions[1][$this->position][0]) {
  228.             case $this->options['tag_comment'][0]:
  229.                 $this->lexComment();
  230.                 break;
  231.             case $this->options['tag_block'][0]:
  232.                 // raw data?
  233.                 if (preg_match($this->regexes['lex_block_raw'], $this->code$match0$this->cursor)) {
  234.                     $this->moveCursor($match[0]);
  235.                     $this->lexRawData();
  236.                 // {% line \d+ %}
  237.                 } elseif (preg_match($this->regexes['lex_block_line'], $this->code$match0$this->cursor)) {
  238.                     $this->moveCursor($match[0]);
  239.                     $this->lineno = (int) $match[1];
  240.                 } else {
  241.                     $this->pushToken(Token::BLOCK_START_TYPE);
  242.                     $this->pushState(self::STATE_BLOCK);
  243.                     $this->currentVarBlockLine $this->lineno;
  244.                 }
  245.                 break;
  246.             case $this->options['tag_variable'][0]:
  247.                 $this->pushToken(Token::VAR_START_TYPE);
  248.                 $this->pushState(self::STATE_VAR);
  249.                 $this->currentVarBlockLine $this->lineno;
  250.                 break;
  251.         }
  252.     }
  253.     private function lexBlock(): void
  254.     {
  255.         if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code$match0$this->cursor)) {
  256.             $this->pushToken(Token::BLOCK_END_TYPE);
  257.             $this->moveCursor($match[0]);
  258.             $this->popState();
  259.         } else {
  260.             $this->lexExpression();
  261.         }
  262.     }
  263.     private function lexVar(): void
  264.     {
  265.         if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code$match0$this->cursor)) {
  266.             $this->pushToken(Token::VAR_END_TYPE);
  267.             $this->moveCursor($match[0]);
  268.             $this->popState();
  269.         } else {
  270.             $this->lexExpression();
  271.         }
  272.     }
  273.     private function lexExpression(): void
  274.     {
  275.         // whitespace
  276.         if (preg_match('/\s+/A'$this->code$match0$this->cursor)) {
  277.             $this->moveCursor($match[0]);
  278.             if ($this->cursor >= $this->end) {
  279.                 throw new SyntaxError(\sprintf('Unclosed "%s".'self::STATE_BLOCK === $this->state 'block' 'variable'), $this->currentVarBlockLine$this->source);
  280.             }
  281.         }
  282.         // spread operator
  283.         if ('.' === $this->code[$this->cursor] && ($this->cursor $this->end) && '.' === $this->code[$this->cursor 1] && '.' === $this->code[$this->cursor 2]) {
  284.             $this->pushToken(Token::SPREAD_TYPE'...');
  285.             $this->moveCursor('...');
  286.         }
  287.         // arrow function
  288.         elseif ('=' === $this->code[$this->cursor] && ($this->cursor $this->end) && '>' === $this->code[$this->cursor 1]) {
  289.             $this->pushToken(Token::ARROW_TYPE'=>');
  290.             $this->moveCursor('=>');
  291.         }
  292.         // operators
  293.         elseif (preg_match($this->regexes['operator'], $this->code$match0$this->cursor)) {
  294.             $this->pushToken(Token::OPERATOR_TYPEpreg_replace('/\s+/'' '$match[0]));
  295.             $this->moveCursor($match[0]);
  296.         }
  297.         // names
  298.         elseif (preg_match(self::REGEX_NAME$this->code$match0$this->cursor)) {
  299.             $this->pushToken(Token::NAME_TYPE$match[0]);
  300.             $this->moveCursor($match[0]);
  301.         }
  302.         // numbers
  303.         elseif (preg_match(self::REGEX_NUMBER$this->code$match0$this->cursor)) {
  304.             $number = (float) $match[0];  // floats
  305.             if (ctype_digit($match[0]) && $number <= \PHP_INT_MAX) {
  306.                 $number = (int) $match[0]; // integers lower than the maximum
  307.             }
  308.             $this->pushToken(Token::NUMBER_TYPE$number);
  309.             $this->moveCursor($match[0]);
  310.         }
  311.         // punctuation
  312.         elseif (str_contains(self::PUNCTUATION$this->code[$this->cursor])) {
  313.             // opening bracket
  314.             if (str_contains('([{'$this->code[$this->cursor])) {
  315.                 $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
  316.             }
  317.             // closing bracket
  318.             elseif (str_contains(')]}'$this->code[$this->cursor])) {
  319.                 if (empty($this->brackets)) {
  320.                     throw new SyntaxError(\sprintf('Unexpected "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  321.                 }
  322.                 [$expect$lineno] = array_pop($this->brackets);
  323.                 if ($this->code[$this->cursor] != strtr($expect'([{'')]}')) {
  324.                     throw new SyntaxError(\sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  325.                 }
  326.             }
  327.             $this->pushToken(Token::PUNCTUATION_TYPE$this->code[$this->cursor]);
  328.             ++$this->cursor;
  329.         }
  330.         // strings
  331.         elseif (preg_match(self::REGEX_STRING$this->code$match0$this->cursor)) {
  332.             $this->pushToken(Token::STRING_TYPE$this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 01)));
  333.             $this->moveCursor($match[0]);
  334.         }
  335.         // opening double quoted string
  336.         elseif (preg_match(self::REGEX_DQ_STRING_DELIM$this->code$match0$this->cursor)) {
  337.             $this->brackets[] = ['"'$this->lineno];
  338.             $this->pushState(self::STATE_STRING);
  339.             $this->moveCursor($match[0]);
  340.         }
  341.         // unlexable
  342.         else {
  343.             throw new SyntaxError(\sprintf('Unexpected character "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  344.         }
  345.     }
  346.     private function stripcslashes(string $strstring $quoteType): string
  347.     {
  348.         $result '';
  349.         $length \strlen($str);
  350.         $i 0;
  351.         while ($i $length) {
  352.             if (false === $pos strpos($str'\\'$i)) {
  353.                 $result .= substr($str$i);
  354.                 break;
  355.             }
  356.             $result .= substr($str$i$pos $i);
  357.             $i $pos 1;
  358.             if ($i >= $length) {
  359.                 $result .= '\\';
  360.                 break;
  361.             }
  362.             $nextChar $str[$i];
  363.             if (isset(self::SPECIAL_CHARS[$nextChar])) {
  364.                 $result .= self::SPECIAL_CHARS[$nextChar];
  365.             } elseif ('\\' === $nextChar) {
  366.                 $result .= $nextChar;
  367.             } elseif ("'" === $nextChar || '"' === $nextChar) {
  368.                 if ($nextChar !== $quoteType) {
  369.                     trigger_deprecation('twig/twig''3.12''Character "%s" at position %d should not be escaped; the "\" character is ignored in Twig v3 but will not be in v4. Please remove the extra "\" character.'$nextChar$i 1);
  370.                 }
  371.                 $result .= $nextChar;
  372.             } elseif ('#' === $nextChar && $i $length && '{' === $str[$i 1]) {
  373.                 $result .= '#{';
  374.                 ++$i;
  375.             } elseif ('x' === $nextChar && $i $length && ctype_xdigit($str[$i 1])) {
  376.                 $hex $str[++$i];
  377.                 if ($i $length && ctype_xdigit($str[$i 1])) {
  378.                     $hex .= $str[++$i];
  379.                 }
  380.                 $result .= \chr(hexdec($hex));
  381.             } elseif (ctype_digit($nextChar) && $nextChar '8') {
  382.                 $octal $nextChar;
  383.                 while ($i $length && ctype_digit($str[$i 1]) && $str[$i 1] < '8' && \strlen($octal) < 3) {
  384.                     $octal .= $str[++$i];
  385.                 }
  386.                 $result .= \chr(octdec($octal));
  387.             } else {
  388.                 trigger_deprecation('twig/twig''3.12''Character "%s" at position %d should not be escaped; the "\" character is ignored in Twig v3 but will not be in v4. Please remove the extra "\" character.'$nextChar$i 1);
  389.                 $result .= $nextChar;
  390.             }
  391.             ++$i;
  392.         }
  393.         return $result;
  394.     }
  395.     private function lexRawData(): void
  396.     {
  397.         if (!preg_match($this->regexes['lex_raw_data'], $this->code$match\PREG_OFFSET_CAPTURE$this->cursor)) {
  398.             throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.'$this->lineno$this->source);
  399.         }
  400.         $text substr($this->code$this->cursor$match[0][1] - $this->cursor);
  401.         $this->moveCursor($text.$match[0][0]);
  402.         // trim?
  403.         if (isset($match[1][0])) {
  404.             if ($this->options['whitespace_trim'] === $match[1][0]) {
  405.                 // whitespace_trim detected ({%-, {{- or {#-)
  406.                 $text rtrim($text);
  407.             } else {
  408.                 // whitespace_line_trim detected ({%~, {{~ or {#~)
  409.                 // don't trim \r and \n
  410.                 $text rtrim($text" \t\0\x0B");
  411.             }
  412.         }
  413.         $this->pushToken(Token::TEXT_TYPE$text);
  414.     }
  415.     private function lexComment(): void
  416.     {
  417.         if (!preg_match($this->regexes['lex_comment'], $this->code$match\PREG_OFFSET_CAPTURE$this->cursor)) {
  418.             throw new SyntaxError('Unclosed comment.'$this->lineno$this->source);
  419.         }
  420.         $this->moveCursor(substr($this->code$this->cursor$match[0][1] - $this->cursor).$match[0][0]);
  421.     }
  422.     private function lexString(): void
  423.     {
  424.         if (preg_match($this->regexes['interpolation_start'], $this->code$match0$this->cursor)) {
  425.             $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  426.             $this->pushToken(Token::INTERPOLATION_START_TYPE);
  427.             $this->moveCursor($match[0]);
  428.             $this->pushState(self::STATE_INTERPOLATION);
  429.         } elseif (preg_match(self::REGEX_DQ_STRING_PART$this->code$match0$this->cursor) && '' !== $match[0]) {
  430.             $this->pushToken(Token::STRING_TYPE$this->stripcslashes($match[0], '"'));
  431.             $this->moveCursor($match[0]);
  432.         } elseif (preg_match(self::REGEX_DQ_STRING_DELIM$this->code$match0$this->cursor)) {
  433.             [$expect$lineno] = array_pop($this->brackets);
  434.             if ('"' != $this->code[$this->cursor]) {
  435.                 throw new SyntaxError(\sprintf('Unclosed "%s".'$expect), $lineno$this->source);
  436.             }
  437.             $this->popState();
  438.             ++$this->cursor;
  439.         } else {
  440.             // unlexable
  441.             throw new SyntaxError(\sprintf('Unexpected character "%s".'$this->code[$this->cursor]), $this->lineno$this->source);
  442.         }
  443.     }
  444.     private function lexInterpolation(): void
  445.     {
  446.         $bracket end($this->brackets);
  447.         if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code$match0$this->cursor)) {
  448.             array_pop($this->brackets);
  449.             $this->pushToken(Token::INTERPOLATION_END_TYPE);
  450.             $this->moveCursor($match[0]);
  451.             $this->popState();
  452.         } else {
  453.             $this->lexExpression();
  454.         }
  455.     }
  456.     private function pushToken($type$value ''): void
  457.     {
  458.         // do not push empty text tokens
  459.         if (Token::TEXT_TYPE === $type && '' === $value) {
  460.             return;
  461.         }
  462.         $this->tokens[] = new Token($type$value$this->lineno);
  463.     }
  464.     private function moveCursor($text): void
  465.     {
  466.         $this->cursor += \strlen($text);
  467.         $this->lineno += substr_count($text"\n");
  468.     }
  469.     private function getOperatorRegex(): string
  470.     {
  471.         $operators array_merge(
  472.             ['='],
  473.             array_keys($this->env->getUnaryOperators()),
  474.             array_keys($this->env->getBinaryOperators())
  475.         );
  476.         $operators array_combine($operatorsarray_map('strlen'$operators));
  477.         arsort($operators);
  478.         $regex = [];
  479.         foreach ($operators as $operator => $length) {
  480.             // an operator that ends with a character must be followed by
  481.             // a whitespace, a parenthesis, an opening map [ or sequence {
  482.             $r preg_quote($operator'/');
  483.             if (ctype_alpha($operator[$length 1])) {
  484.                 $r .= '(?=[\s()\[{])';
  485.             }
  486.             // an operator that begins with a character must not have a dot or pipe before
  487.             if (ctype_alpha($operator[0])) {
  488.                 $r '(?<![\.\|])'.$r;
  489.             }
  490.             // an operator with a space can be any amount of whitespaces
  491.             $r preg_replace('/\s+/''\s+'$r);
  492.             $regex[] = $r;
  493.         }
  494.         return '/'.implode('|'$regex).'/A';
  495.     }
  496.     private function pushState($state): void
  497.     {
  498.         $this->states[] = $this->state;
  499.         $this->state $state;
  500.     }
  501.     private function popState(): void
  502.     {
  503.         if (=== \count($this->states)) {
  504.             throw new \LogicException('Cannot pop state without a previous state.');
  505.         }
  506.         $this->state array_pop($this->states);
  507.     }
  508. }