💾 Archived View for tris.fyi › pydoc › pycparser.c_lexer captured on 2022-06-11 at 21:43:24. Gemini links have been rewritten to link to archived content
⬅️ Previous capture (2022-01-08)
-=-=-=-=-=-=-
This module has no docstring.
A lexer for the C language. After building it, set the input text with input(), and call token() to get new tokens. The public attribute filename can be set to an initial filename, but the lexer will update it upon #line directives.
build(self, **kwargs) Builds the lexer from the specification. Must be called after the lexer object is created. This method exists separately, because the PLY manual warns against calling lex.lex inside __init__
find_tok_column(self, token) Find the column of the token in its line.
input(self, text)
reset_lineno(self) Resets the internal line number counter of the lexer.
t_BAD_CHAR_CONST(self, t)
t_BAD_CONST_OCT(self, t)
t_BAD_STRING_LITERAL(self, t)
t_CHAR_CONST(self, t)
t_FLOAT_CONST(self, t)
t_HEX_FLOAT_CONST(self, t)
t_ID(self, t)
t_INT_CONST_BIN(self, t)
t_INT_CONST_CHAR(self, t)
t_INT_CONST_DEC(self, t)
t_INT_CONST_HEX(self, t)
t_INT_CONST_OCT(self, t)
t_LBRACE(self, t)
t_NEWLINE(self, t) \n+
t_PPHASH(self, t) [ \t]*\#
t_RBRACE(self, t)
t_UNMATCHED_QUOTE(self, t)
t_WCHAR_CONST(self, t)
t_WSTRING_LITERAL(self, t)
t_error(self, t)
t_ppline_FILENAME(self, t)
t_ppline_LINE_NUMBER(self, t)
t_ppline_NEWLINE(self, t) \n
t_ppline_PPLINE(self, t) line
t_ppline_error(self, t)
t_pppragma_NEWLINE(self, t) \n
t_pppragma_PPPRAGMA(self, t) pragma
t_pppragma_STR(self, t) .+
t_pppragma_error(self, t)
token(self)
bad_char_const = '(\'([^\'\\\\\\n]|(\\\\(([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))|(\\d+)(?!\\d)|(x[0-9a-fA-F]+)(?![0-9a-fA-F]))))[^\'\n]+\')|(\'\')|(\'([\\\\][^a-zA-Z._~^!=&\\^\\-\\\\?\'"x0-9])[^\'\\n]*\')'
bad_escape = '([\\\\][^a-zA-Z._~^!=&\\^\\-\\\\?\'"x0-9])'
bad_octal_constant = '0[0-7]*[89]'
bad_string_literal = '"([^"\\\\\\n]|(\\\\[0-9a-zA-Z._~!=&\\^\\-\\\\?\'"]))*([\\\\][^a-zA-Z._~^!=&\\^\\-\\\\?\'"x0-9])([^"\\\\\\n]|(\\\\[0-9a-zA-Z._~!=&\\^\\-\\\\?\'"]))*"'
bin_constant = '0[bB][01]+(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
bin_digits = '[01]+'
bin_prefix = '0[bB]'
binary_exponent_part = '([pP][+-]?[0-9]+)'
cconst_char = '([^\'\\\\\\n]|(\\\\(([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))|(\\d+)(?!\\d)|(x[0-9a-fA-F]+)(?![0-9a-fA-F]))))'
char_const = '\'([^\'\\\\\\n]|(\\\\(([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))|(\\d+)(?!\\d)|(x[0-9a-fA-F]+)(?![0-9a-fA-F]))))\''
decimal_constant = '(0(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?)|([1-9][0-9]*(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?)'
decimal_escape = '(\\d+)(?!\\d)'
escape_sequence = '(\\\\(([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))|(\\d+)(?!\\d)|(x[0-9a-fA-F]+)(?![0-9a-fA-F])))'
escape_sequence_start_in_string = '(\\\\[0-9a-zA-Z._~!=&\\^\\-\\\\?\'"])'
exponent_part = '([eE][-+]?[0-9]+)'
floating_constant = '((((([0-9]*\\.[0-9]+)|([0-9]+\\.))([eE][-+]?[0-9]+)?)|([0-9]+([eE][-+]?[0-9]+)))[FfLl]?)'
fractional_constant = '([0-9]*\\.[0-9]+)|([0-9]+\\.)'
hex_constant = '0[xX][0-9a-fA-F]+(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
hex_digits = '[0-9a-fA-F]+'
hex_escape = '(x[0-9a-fA-F]+)(?![0-9a-fA-F])'
hex_floating_constant = '(0[xX]([0-9a-fA-F]+|((([0-9a-fA-F]+)?\\.[0-9a-fA-F]+)|([0-9a-fA-F]+\\.)))([pP][+-]?[0-9]+)[FfLl]?)'
hex_fractional_constant = '((([0-9a-fA-F]+)?\\.[0-9a-fA-F]+)|([0-9a-fA-F]+\\.))'
hex_prefix = '0[xX]'
identifier = '[a-zA-Z_$][0-9a-zA-Z_$]*'
integer_suffix_opt = '(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
keyword = '__INT128'
keyword_map = {'_Bool': '_BOOL', '_Complex': '_COMPLEX', 'auto': 'AUTO', 'break': 'BREAK', 'case': 'CASE', 'char': 'CHAR', 'const': 'CONST', 'continue': 'CONTINUE', 'default': 'DEFAULT', 'do': 'DO', 'double': 'DOUBLE', 'else': 'ELSE', 'enum': 'ENUM', 'extern': 'EXTERN', 'float': 'FLOAT', 'for': 'FOR', 'goto': 'GOTO', 'if': 'IF', 'inline': 'INLINE', 'int': 'INT', 'long': 'LONG', 'register': 'REGISTER', 'offsetof': 'OFFSETOF', 'restrict': 'RESTRICT', 'return': 'RETURN', 'short': 'SHORT', 'signed': 'SIGNED', 'sizeof': 'SIZEOF', 'static': 'STATIC', 'struct': 'STRUCT', 'switch': 'SWITCH', 'typedef': 'TYPEDEF', 'union': 'UNION', 'unsigned': 'UNSIGNED', 'void': 'VOID', 'volatile': 'VOLATILE', 'while': 'WHILE', '__int128': '__INT128'}
keywords = ('_BOOL', '_COMPLEX', 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'REGISTER', 'OFFSETOF', 'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', '__INT128')
multicharacter_constant = '\'([^\'\\\\\\n]|(\\\\(([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))|(\\d+)(?!\\d)|(x[0-9a-fA-F]+)(?![0-9a-fA-F])))){2,4}\''
octal_constant = '0[0-7]*(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
simple_escape = '([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))'
states = (('ppline', 'exclusive'), ('pppragma', 'exclusive'))
string_char = '([^"\\\\\\n]|(\\\\[0-9a-zA-Z._~!=&\\^\\-\\\\?\'"]))'
string_literal = '"([^"\\\\\\n]|(\\\\[0-9a-zA-Z._~!=&\\^\\-\\\\?\'"]))*"'
t_AND = '&'
t_ANDEQUAL = '&='
t_ARROW = '->'
t_COLON = ':'
t_COMMA = ','
t_CONDOP = '\\?'
t_DIVEQUAL = '/='
t_DIVIDE = '/'
t_ELLIPSIS = '\\.\\.\\.'
t_EQ = '=='
t_EQUALS = '='
t_GE = '>='
t_GT = '>'
t_LAND = '&&'
t_LBRACKET = '\\['
t_LE = '<='
t_LNOT = '!'
t_LOR = '\\|\\|'
t_LPAREN = '\\('
t_LSHIFT = '<<'
t_LSHIFTEQUAL = '<<='
t_LT = '<'
t_MINUS = '-'
t_MINUSEQUAL = '-='
t_MINUSMINUS = '--'
t_MOD = '%'
t_MODEQUAL = '%='
t_NE = '!='
t_NOT = '~'
t_OR = '\\|'
t_OREQUAL = '\\|='
t_PERIOD = '\\.'
t_PLUS = '\\+'
t_PLUSEQUAL = '\\+='
t_PLUSPLUS = '\\+\\+'
t_RBRACKET = '\\]'
t_RPAREN = '\\)'
t_RSHIFT = '>>'
t_RSHIFTEQUAL = '>>='
t_SEMI = ';'
t_STRING_LITERAL = '"([^"\\\\\\n]|(\\\\[0-9a-zA-Z._~!=&\\^\\-\\\\?\'"]))*"'
t_TIMES = '\\*'
t_TIMESEQUAL = '\\*='
t_XOR = '\\^'
t_XOREQUAL = '\\^='
t_ignore = ' \t'
t_ppline_ignore = ' \t'
t_pppragma_ignore = ' \t'
tokens = ('_BOOL', '_COMPLEX', 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'REGISTER', 'OFFSETOF', 'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', '__INT128', 'ID', 'TYPEID', 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN', 'INT_CONST_CHAR', 'FLOAT_CONST', 'HEX_FLOAT_CONST', 'CHAR_CONST', 'WCHAR_CONST', 'STRING_LITERAL', 'WSTRING_LITERAL', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 'LOR', 'LAND', 'LNOT', 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 'LSHIFTEQUAL', 'RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', 'PLUSPLUS', 'MINUSMINUS', 'ARROW', 'CONDOP', 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', 'LBRACE', 'RBRACE', 'COMMA', 'PERIOD', 'SEMI', 'COLON', 'ELLIPSIS', 'PPHASH', 'PPPRAGMA', 'PPPRAGMASTR')
unmatched_quote = '(\'([^\'\\\\\\n]|(\\\\(([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))|(\\d+)(?!\\d)|(x[0-9a-fA-F]+)(?![0-9a-fA-F]))))*\\n)|(\'([^\'\\\\\\n]|(\\\\(([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))|(\\d+)(?!\\d)|(x[0-9a-fA-F]+)(?![0-9a-fA-F]))))*$)'
wchar_const = 'L\'([^\'\\\\\\n]|(\\\\(([a-wyzA-Z._~!=&\\^\\-\\\\?\'"]|x(?![0-9a-fA-F]))|(\\d+)(?!\\d)|(x[0-9a-fA-F]+)(?![0-9a-fA-F]))))\''
wstring_literal = 'L"([^"\\\\\\n]|(\\\\[0-9a-zA-Z._~!=&\\^\\-\\\\?\'"]))*"'
TOKEN(r)