mirror of
https://github.com/hak5/nano-tetra-modules.git
synced 2025-10-29 16:58:09 +00:00
190 lines
7.0 KiB
Cython
Executable File
190 lines
7.0 KiB
Cython
Executable File
# coding: utf8
|
||
"""
|
||
tinycss.speedups
|
||
----------------
|
||
|
||
Cython module for speeding up inner loops.
|
||
|
||
Right now only :func:`tokenize_flat` has a second implementation.
|
||
|
||
:copyright: (c) 2010 by Simon Sapin.
|
||
:license: BSD, see LICENSE for more details.
|
||
"""
|
||
|
||
from __future__ import unicode_literals
|
||
|
||
from .token_data import (
|
||
COMPILED_TOKEN_REGEXPS, UNICODE_UNESCAPE, NEWLINE_UNESCAPE,
|
||
SIMPLE_UNESCAPE, FIND_NEWLINES, TOKEN_DISPATCH)
|
||
|
||
|
||
COMPILED_TOKEN_INDEXES = dict(
|
||
(name, i) for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS))
|
||
|
||
|
||
cdef class CToken:
|
||
"""A token built by the Cython speedups. Identical to
|
||
:class:`~.token_data.Token`.
|
||
|
||
"""
|
||
is_container = False
|
||
|
||
cdef public object type, _as_css, value, unit
|
||
cdef public Py_ssize_t line, column
|
||
|
||
def __init__(self, type_, css_value, value, unit, line, column):
|
||
self.type = type_
|
||
self._as_css = css_value
|
||
self.value = value
|
||
self.unit = unit
|
||
self.line = line
|
||
self.column = column
|
||
|
||
def as_css(self):
|
||
"""
|
||
Return as an Unicode string the CSS representation of the token,
|
||
as parsed in the source.
|
||
"""
|
||
return self._as_css
|
||
|
||
def __repr__(self):
|
||
return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
|
||
.format(self, self.unit or ''))
|
||
|
||
|
||
def tokenize_flat(css_source, int ignore_comments=1):
|
||
"""
|
||
:param css_source:
|
||
CSS as an unicode string
|
||
:param ignore_comments:
|
||
if true (the default) comments will not be included in the
|
||
return value
|
||
:return:
|
||
An iterator of :class:`Token`
|
||
|
||
"""
|
||
# Make these local variable to avoid global lookups in the loop
|
||
tokens_dispatch = TOKEN_DISPATCH
|
||
compiled_token_indexes = COMPILED_TOKEN_INDEXES
|
||
compiled_tokens = COMPILED_TOKEN_REGEXPS
|
||
unicode_unescape = UNICODE_UNESCAPE
|
||
newline_unescape = NEWLINE_UNESCAPE
|
||
simple_unescape = SIMPLE_UNESCAPE
|
||
find_newlines = FIND_NEWLINES
|
||
|
||
# Use the integer indexes instead of string markers
|
||
cdef Py_ssize_t BAD_COMMENT = compiled_token_indexes['BAD_COMMENT']
|
||
cdef Py_ssize_t BAD_STRING = compiled_token_indexes['BAD_STRING']
|
||
cdef Py_ssize_t PERCENTAGE = compiled_token_indexes['PERCENTAGE']
|
||
cdef Py_ssize_t DIMENSION = compiled_token_indexes['DIMENSION']
|
||
cdef Py_ssize_t ATKEYWORD = compiled_token_indexes['ATKEYWORD']
|
||
cdef Py_ssize_t FUNCTION = compiled_token_indexes['FUNCTION']
|
||
cdef Py_ssize_t COMMENT = compiled_token_indexes['COMMENT']
|
||
cdef Py_ssize_t NUMBER = compiled_token_indexes['NUMBER']
|
||
cdef Py_ssize_t STRING = compiled_token_indexes['STRING']
|
||
cdef Py_ssize_t IDENT = compiled_token_indexes['IDENT']
|
||
cdef Py_ssize_t HASH = compiled_token_indexes['HASH']
|
||
cdef Py_ssize_t URI = compiled_token_indexes['URI']
|
||
cdef Py_ssize_t DELIM = -1
|
||
|
||
cdef Py_ssize_t pos = 0
|
||
cdef Py_ssize_t line = 1
|
||
cdef Py_ssize_t column = 1
|
||
cdef Py_ssize_t source_len = len(css_source)
|
||
cdef Py_ssize_t n_tokens = len(compiled_tokens)
|
||
cdef Py_ssize_t length, next_pos, type_
|
||
cdef CToken token
|
||
|
||
tokens = []
|
||
while pos < source_len:
|
||
char = css_source[pos]
|
||
if char in ':;{}()[]':
|
||
type_ = -1 # not parsed further anyway
|
||
type_name = char
|
||
css_value = char
|
||
else:
|
||
codepoint = min(ord(char), 160)
|
||
for type_, type_name, regexp in tokens_dispatch[codepoint]:
|
||
match = regexp(css_source, pos)
|
||
if match:
|
||
# First match is the longest. See comments on TOKENS above.
|
||
css_value = match.group()
|
||
break
|
||
else:
|
||
# No match.
|
||
# "Any other character not matched by the above rules,
|
||
# and neither a single nor a double quote."
|
||
# ... but quotes at the start of a token are always matched
|
||
# by STRING or BAD_STRING. So DELIM is any single character.
|
||
type_ = DELIM
|
||
type_name = 'DELIM'
|
||
css_value = char
|
||
length = len(css_value)
|
||
next_pos = pos + length
|
||
|
||
# A BAD_COMMENT is a comment at EOF. Ignore it too.
|
||
if not (ignore_comments and type_ in (COMMENT, BAD_COMMENT)):
|
||
# Parse numbers, extract strings and URIs, unescape
|
||
unit = None
|
||
if type_ == DIMENSION:
|
||
value = match.group(1)
|
||
value = float(value) if '.' in value else int(value)
|
||
unit = match.group(2)
|
||
unit = simple_unescape(unit)
|
||
unit = unicode_unescape(unit)
|
||
unit = unit.lower() # normalize
|
||
elif type_ == PERCENTAGE:
|
||
value = css_value[:-1]
|
||
value = float(value) if '.' in value else int(value)
|
||
unit = '%'
|
||
elif type_ == NUMBER:
|
||
value = css_value
|
||
if '.' in value:
|
||
value = float(value)
|
||
else:
|
||
value = int(value)
|
||
type_name = 'INTEGER'
|
||
elif type_ in (IDENT, ATKEYWORD, HASH, FUNCTION):
|
||
value = simple_unescape(css_value)
|
||
value = unicode_unescape(value)
|
||
elif type_ == URI:
|
||
value = match.group(1)
|
||
if value and value[0] in '"\'':
|
||
value = value[1:-1] # Remove quotes
|
||
value = newline_unescape(value)
|
||
value = simple_unescape(value)
|
||
value = unicode_unescape(value)
|
||
elif type_ == STRING:
|
||
value = css_value[1:-1] # Remove quotes
|
||
value = newline_unescape(value)
|
||
value = simple_unescape(value)
|
||
value = unicode_unescape(value)
|
||
# BAD_STRING can only be one of:
|
||
# * Unclosed string at the end of the stylesheet:
|
||
# Close the string, but this is not an error.
|
||
# Make it a "good" STRING token.
|
||
# * Unclosed string at the (unescaped) end of the line:
|
||
# Close the string, but this is an error.
|
||
# Leave it as a BAD_STRING, don’t bother parsing it.
|
||
# See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
|
||
elif type_ == BAD_STRING and next_pos == source_len:
|
||
type_name = 'STRING'
|
||
value = css_value[1:] # Remove quote
|
||
value = newline_unescape(value)
|
||
value = simple_unescape(value)
|
||
value = unicode_unescape(value)
|
||
else:
|
||
value = css_value
|
||
token = CToken(type_name, css_value, value, unit, line, column)
|
||
tokens.append(token)
|
||
|
||
pos = next_pos
|
||
newlines = list(find_newlines(css_value))
|
||
if newlines:
|
||
line += len(newlines)
|
||
# Add 1 to have lines start at column 1, not 0
|
||
column = length - newlines[-1].end() + 1
|
||
else:
|
||
column += length
|
||
return tokens
|