mirror of
https://github.com/hak5/nano-tetra-modules.git
synced 2025-10-29 16:58:09 +00:00
303 lines
10 KiB
Python
303 lines
10 KiB
Python
# coding: utf-8
|
|
"""
|
|
Tests for the tokenizer
|
|
-----------------------
|
|
|
|
:copyright: (c) 2012 by Simon Sapin.
|
|
:license: BSD, see LICENSE for more details.
|
|
"""
|
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
import os
|
|
import sys
|
|
|
|
import pytest
|
|
from tinycss.tokenizer import (
|
|
cython_tokenize_flat, python_tokenize_flat, regroup)
|
|
|
|
|
|
def test_speedups():
|
|
is_pypy = hasattr(sys, 'pypy_translation_info')
|
|
env_skip_tests = os.environ.get('TINYCSS_SKIP_SPEEDUPS_TESTS')
|
|
# pragma: no cover
|
|
if is_pypy or env_skip_tests:
|
|
return
|
|
assert cython_tokenize_flat is not None, (
|
|
'Cython speedups are not installed, related tests will '
|
|
'be skipped. Set the TINYCSS_SKIP_SPEEDUPS_TESTS environment '
|
|
'variable if this is expected.')
|
|
|
|
|
|
@pytest.mark.parametrize(('tokenize', 'css_source', 'expected_tokens'), [
|
|
(tokenize,) + test_data
|
|
for tokenize in (python_tokenize_flat, cython_tokenize_flat)
|
|
for test_data in [
|
|
('', []),
|
|
('red -->', [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
|
|
# Longest match rule: no CDC
|
|
('red-->', [('IDENT', 'red--'), ('DELIM', '>')]),
|
|
(r'p[example="foo(int x) { this.x = x;}"]', [
|
|
('IDENT', 'p'),
|
|
('[', '['),
|
|
('IDENT', 'example'),
|
|
('DELIM', '='),
|
|
('STRING', 'foo(int x) { this.x = x;}'),
|
|
(']', ']')]),
|
|
|
|
# Numbers are parsed
|
|
('42 .5 -4pX 1.25em 30%', [
|
|
('INTEGER', 42), ('S', ' '),
|
|
('NUMBER', .5), ('S', ' '),
|
|
# units are normalized to lower-case:
|
|
('DIMENSION', -4, 'px'), ('S', ' '),
|
|
('DIMENSION', 1.25, 'em'), ('S', ' '),
|
|
('PERCENTAGE', 30, '%')]),
|
|
|
|
# URLs are extracted
|
|
('url(foo.png)', [('URI', 'foo.png')]),
|
|
('url("foo.png")', [('URI', 'foo.png')]),
|
|
|
|
# Escaping
|
|
|
|
(r'/* Comment with a \ backslash */', [
|
|
('COMMENT', '/* Comment with a \ backslash */')]), # Unchanged
|
|
|
|
# backslash followed by a newline in a string: ignored
|
|
('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
|
|
|
|
# backslash followed by a newline outside a string: stands for itself
|
|
('Lorem\\\nIpsum', [
|
|
('IDENT', 'Lorem'), ('DELIM', '\\'),
|
|
('S', '\n'), ('IDENT', 'Ipsum')]),
|
|
|
|
# Cancel the meaning of special characters
|
|
(r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]), # or not specal
|
|
(r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
|
|
(r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
|
|
(r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
|
|
(r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
|
|
(r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
|
|
(r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
|
|
(r'Lorem+Ipsum', [
|
|
('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
|
|
(r'url(foo\).png)', [('URI', 'foo).png')]),
|
|
|
|
# Unicode and backslash escaping
|
|
('\\26 B', [('IDENT', '&B')]),
|
|
('\\&B', [('IDENT', '&B')]),
|
|
('@\\26\tB', [('ATKEYWORD', '@&B')]),
|
|
('@\\&B', [('ATKEYWORD', '@&B')]),
|
|
('#\\26\nB', [('HASH', '#&B')]),
|
|
('#\\&B', [('HASH', '#&B')]),
|
|
('\\26\r\nB(', [('FUNCTION', '&B(')]),
|
|
('\\&B(', [('FUNCTION', '&B(')]),
|
|
(r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
|
|
(r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]), # max 6 digits
|
|
(r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
|
|
(r'"\26 B"', [('STRING', '&B')]),
|
|
(r"'\000026B'", [('STRING', '&B')]),
|
|
(r'"\&B"', [('STRING', '&B')]),
|
|
(r'url("\26 B")', [('URI', '&B')]),
|
|
(r'url(\26 B)', [('URI', '&B')]),
|
|
(r'url("\&B")', [('URI', '&B')]),
|
|
(r'url(\&B)', [('URI', '&B')]),
|
|
(r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
|
|
|
|
# Bad strings
|
|
|
|
# String ends at EOF without closing: no error, parsed
|
|
('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
|
|
# Unescaped newline: ends the string, error, unparsed
|
|
('"Lorem\\26Ipsum\n', [
|
|
('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
|
|
# Tokenization restarts after the newline, so the second " starts
|
|
# a new string (which ends at EOF without errors, as above.)
|
|
('"Lorem\\26Ipsum\ndolor" sit', [
|
|
('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
|
|
('IDENT', 'dolor'), ('STRING', ' sit')]),
|
|
|
|
]])
|
|
def test_tokens(tokenize, css_source, expected_tokens):
|
|
if tokenize is None: # pragma: no cover
|
|
pytest.skip('Speedups not available')
|
|
sources = [css_source]
|
|
if sys.version_info[0] < 3:
|
|
# On Python 2.x, ASCII-only bytestrings can be used
|
|
# where Unicode is expected.
|
|
sources.append(css_source.encode('ascii'))
|
|
for css_source in sources:
|
|
tokens = tokenize(css_source, ignore_comments=False)
|
|
result = [
|
|
(token.type, token.value) + (
|
|
() if token.unit is None else (token.unit,))
|
|
for token in tokens
|
|
]
|
|
assert result == expected_tokens
|
|
|
|
|
|
@pytest.mark.parametrize('tokenize', [
|
|
python_tokenize_flat, cython_tokenize_flat])
|
|
def test_positions(tokenize):
|
|
"""Test the reported line/column position of each token."""
|
|
if tokenize is None: # pragma: no cover
|
|
pytest.skip('Speedups not available')
|
|
css = '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }'
|
|
tokens = tokenize(css, ignore_comments=False)
|
|
result = [(token.type, token.line, token.column) for token in tokens]
|
|
assert result == [
|
|
('COMMENT', 1, 1), ('S', 2, 9),
|
|
('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
|
|
('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
|
|
('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
|
|
('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
|
|
('S', 5, 5), ('}', 5, 6)]
|
|
|
|
|
|
@pytest.mark.parametrize(('tokenize', 'css_source', 'expected_tokens'), [
|
|
(tokenize,) + test_data
|
|
for tokenize in (python_tokenize_flat, cython_tokenize_flat)
|
|
for test_data in [
|
|
('', []),
|
|
(r'Lorem\26 "i\psum"4px', [
|
|
('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
|
|
|
|
('not([[lorem]]{ipsum (42)})', [
|
|
('FUNCTION', 'not', [
|
|
('[', [
|
|
('[', [
|
|
('IDENT', 'lorem'),
|
|
]),
|
|
]),
|
|
('{', [
|
|
('IDENT', 'ipsum'),
|
|
('S', ' '),
|
|
('(', [
|
|
('INTEGER', 42),
|
|
])
|
|
])
|
|
])]),
|
|
|
|
# Close everything at EOF, no error
|
|
('a[b{"d', [
|
|
('IDENT', 'a'),
|
|
('[', [
|
|
('IDENT', 'b'),
|
|
('{', [
|
|
('STRING', 'd'),
|
|
]),
|
|
]),
|
|
]),
|
|
|
|
# Any remaining ), ] or } token is a nesting error
|
|
('a[b{d]e}', [
|
|
('IDENT', 'a'),
|
|
('[', [
|
|
('IDENT', 'b'),
|
|
('{', [
|
|
('IDENT', 'd'),
|
|
(']', ']'), # The error is visible here
|
|
('IDENT', 'e'),
|
|
]),
|
|
]),
|
|
]),
|
|
# ref:
|
|
('a[b{d}e]', [
|
|
('IDENT', 'a'),
|
|
('[', [
|
|
('IDENT', 'b'),
|
|
('{', [
|
|
('IDENT', 'd'),
|
|
]),
|
|
('IDENT', 'e'),
|
|
]),
|
|
]),
|
|
]])
|
|
def test_token_grouping(tokenize, css_source, expected_tokens):
|
|
if tokenize is None: # pragma: no cover
|
|
pytest.skip('Speedups not available')
|
|
tokens = regroup(tokenize(css_source, ignore_comments=False))
|
|
result = list(jsonify(tokens))
|
|
assert result == expected_tokens
|
|
|
|
|
|
def jsonify(tokens):
|
|
"""Turn tokens into "JSON-compatible" data structures."""
|
|
for token in tokens:
|
|
if token.type == 'FUNCTION':
|
|
yield (token.type, token.function_name,
|
|
list(jsonify(token.content)))
|
|
elif token.is_container:
|
|
yield token.type, list(jsonify(token.content))
|
|
else:
|
|
yield token.type, token.value
|
|
|
|
|
|
@pytest.mark.parametrize(('tokenize', 'ignore_comments', 'expected_tokens'), [
|
|
(tokenize,) + test_data
|
|
for tokenize in (python_tokenize_flat, cython_tokenize_flat)
|
|
for test_data in [
|
|
(False, [
|
|
('COMMENT', '/* lorem */'),
|
|
('S', ' '),
|
|
('IDENT', 'ipsum'),
|
|
('[', [
|
|
('IDENT', 'dolor'),
|
|
('COMMENT', '/* sit */'),
|
|
]),
|
|
('BAD_COMMENT', '/* amet')
|
|
]),
|
|
(True, [
|
|
('S', ' '),
|
|
('IDENT', 'ipsum'),
|
|
('[', [
|
|
('IDENT', 'dolor'),
|
|
]),
|
|
]),
|
|
]])
|
|
def test_comments(tokenize, ignore_comments, expected_tokens):
|
|
if tokenize is None: # pragma: no cover
|
|
pytest.skip('Speedups not available')
|
|
css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
|
|
tokens = regroup(tokenize(css_source, ignore_comments))
|
|
result = list(jsonify(tokens))
|
|
assert result == expected_tokens
|
|
|
|
|
|
@pytest.mark.parametrize(('tokenize', 'css_source'), [
|
|
(tokenize, test_data)
|
|
for tokenize in (python_tokenize_flat, cython_tokenize_flat)
|
|
for test_data in [
|
|
r'p[example="foo(int x) { this.x = x;}"]',
|
|
'"Lorem\\26Ipsum\ndolor" sit',
|
|
'/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }',
|
|
'not([[lorem]]{ipsum (42)})',
|
|
'a[b{d]e}',
|
|
'a[b{"d',
|
|
]])
|
|
def test_token_serialize_css(tokenize, css_source):
|
|
if tokenize is None: # pragma: no cover
|
|
pytest.skip('Speedups not available')
|
|
for _regroup in [regroup, lambda x: x]:
|
|
tokens = _regroup(tokenize(css_source, ignore_comments=False))
|
|
result = ''.join(token.as_css() for token in tokens)
|
|
assert result == css_source
|
|
|
|
|
|
@pytest.mark.parametrize(('tokenize', 'css_source'), [
|
|
(tokenize, test_data)
|
|
for tokenize in (python_tokenize_flat, cython_tokenize_flat)
|
|
for test_data in [
|
|
'(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
|
|
]
|
|
])
|
|
def test_token_api(tokenize, css_source):
|
|
if tokenize is None: # pragma: no cover
|
|
pytest.skip('Speedups not available')
|
|
tokens = list(regroup(tokenize(css_source)))
|
|
assert len(tokens) == 1
|
|
token = tokens[0]
|
|
expected_len = 7 # 2 spaces, 2 commas, 3 others.
|
|
assert len(token.content) == expected_len
|