Coverage for pygments.lexers.parsers : 100%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# -*- coding: utf-8 -*- pygments.lexers.parsers ~~~~~~~~~~~~~~~~~~~~~~~
Lexers for parser generators.
:copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """
include, bygroups, using Keyword, Name, String, Number, Whitespace
'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer', 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer', 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer', # 'AntlrCLexer', 'AntlrCSharpLexer', 'AntlrObjectiveCLexer', 'AntlrJavaLexer', 'AntlrActionScriptLexer', 'TreetopLexer', 'EbnfLexer']
""" A pure `Ragel <http://www.complang.org/ragel/>`_ lexer. Use this for fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead (or one of the language-specific subclasses).
.. versionadded:: 1.1 """
'whitespace': [ (r'\s+', Whitespace) ], 'comments': [ (r'\#.*$', Comment), ], 'keywords': [ (r'(access|action|alphtype)\b', Keyword), (r'(getkey|write|machine|include)\b', Keyword), (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword), (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword) ], 'numbers': [ (r'0x[0-9A-Fa-f]+', Number.Hex), (r'[+-]?[0-9]+', Number.Integer), ], 'literals': [ (r'"(\\\\|\\"|[^"])*"', String), # double quote string (r"'(\\\\|\\'|[^'])*'", String), # single quote string (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions ], 'identifiers': [ (r'[a-zA-Z_]\w*', Name.Variable), ], 'operators': [ (r',', Operator), # Join (r'\||&|--?', Operator), # Union, Intersection and Subtraction (r'\.|<:|:>>?', Operator), # Concatention (r':', Operator), # Label (r'->', Operator), # Epsilon Transition (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions (r'>|@|\$|%', Operator), # Transition Actions and Priorities (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition (r'!|\^', Operator), # Negation (r'\(|\)', Operator), # Grouping ], 'root': [ include('literals'), include('whitespace'), include('comments'), include('keywords'), include('numbers'), include('identifiers'), include('operators'), (r'\{', Punctuation, 'host'), (r'=', Operator), (r';', Punctuation), ], 'host': [ (r'(' + r'|'.join(( # keep host code in largest possible chunks r'[^{}\'"/#]+', # exclude unsafe characters r'[^\\]\\[{}]', # allow escaped { or }
# strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r'//.*$\n?', # single line comment r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'\#.*$\n?', # ruby comment
# regular expression: There's no reason for it to start # with a * and this stops confusion with comments. r'/(?!\*)(\\\\|\\/|[^/])*/',
# / is safe now that we've handled regex and javadoc comments r'/', )) + r')+', Other),
(r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], }
""" A lexer for `Ragel`_ embedded in a host language file.
This will only highlight Ragel statements. If you want host language highlighting then call the language-specific Ragel lexer.
.. versionadded:: 1.1 """
'root': [ (r'(' + r'|'.join(( # keep host code in largest possible chunks r'[^%\'"/#]+', # exclude unsafe characters r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
# strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'//.*$\n?', # single line comment r'\#.*$\n?', # ruby/ragel comment r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
# / is safe now that we've handled regex and javadoc comments r'/', )) + r')+', Other),
# Single Line FSM. # Please don't put a quoted newline in a single line FSM. # That's just mean. It will break this. (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation, using(RagelLexer), Punctuation, Text)),
# Multi Line FSM. (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'), ], 'multi-line-fsm': [ (r'(' + r'|'.join(( # keep ragel code in largest possible chunks. r'(' + r'|'.join(( r'[^}\'"\[/#]', # exclude unsafe characters r'\}(?=[^%]|$)', # } is okay as long as it's not followed by % r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two... r'[^\\]\\[{}]', # ...and } is okay if it's escaped
# allow / if it's preceded with one of these symbols # (ragel EOF actions) r'(>|\$|%|<|@|<>)/',
# specifically allow regex followed immediately by * # so it doesn't get mistaken for a comment r'/(?!\*)(\\\\|\\/|[^/])*/\*',
# allow / as long as it's not followed by another / or by a * r'/(?=[^/*]|$)',
# We want to match as many of these as we can in one block. # Not sure if we need the + sign here, # does it help performance? )) + r')+',
# strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment r'//.*$\n?', # single line comment r'\#.*$\n?', # ruby/ragel comment )) + r')+', using(RagelLexer)),
(r'\}%%', Punctuation, '#pop'), ] }
""" A lexer for `Ragel`_ in a Ruby host file.
.. versionadded:: 1.1 """
**options)
""" A lexer for `Ragel`_ in a C host file.
.. versionadded:: 1.1 """
**options)
""" A lexer for `Ragel`_ in a D host file.
.. versionadded:: 1.1 """
""" A lexer for `Ragel`_ in a CPP host file.
.. versionadded:: 1.1 """
""" A lexer for `Ragel`_ in an Objective C host file.
.. versionadded:: 1.1 """
RagelEmbeddedLexer, **options)
""" A lexer for `Ragel`_ in a Java host file.
.. versionadded:: 1.1 """
**options)
""" Generic `ANTLR`_ Lexer. Should not be called directly, instead use DelegatingLexer for your target language.
.. versionadded:: 1.1
.. _ANTLR: http://www.antlr.org/ """
'whitespace': [ (r'\s+', Whitespace), ], 'comments': [ (r'//.*$', Comment), (r'/\*(.|\n)*?\*/', Comment), ], 'root': [ include('whitespace'), include('comments'),
(r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)', bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class, Punctuation)), # optionsSpec (r'options\b', Keyword, 'options'), # tokensSpec (r'tokens\b', Keyword, 'tokens'), # attrScope (r'(scope)(\s*)(' + _id + ')(\s*)(\{)', bygroups(Keyword, Whitespace, Name.Variable, Whitespace, Punctuation), 'action'), # exception (r'(catch|finally)\b', Keyword, 'exception'), # action (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)(\{)', bygroups(Name.Label, Whitespace, Punctuation, Whitespace, Name.Label, Whitespace, Punctuation), 'action'), # rule (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', bygroups(Keyword, Whitespace, Name.Label, Punctuation), ('rule-alts', 'rule-prelims')), ], 'exception': [ (r'\n', Whitespace, '#pop'), (r'\s', Whitespace), include('comments'),
(r'\[', Punctuation, 'nested-arg-action'), (r'\{', Punctuation, 'action'), ], 'rule-prelims': [ include('whitespace'), include('comments'),
(r'returns\b', Keyword), (r'\[', Punctuation, 'nested-arg-action'), (r'\{', Punctuation, 'action'), # throwsSpec (r'(throws)(\s+)(' + _id + ')', bygroups(Keyword, Whitespace, Name.Label)), (r'(,)(\s*)(' + _id + ')', bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws # optionsSpec (r'options\b', Keyword, 'options'), # ruleScopeSpec - scope followed by target language code or name of action # TODO finish implementing other possibilities for scope # L173 ANTLRv3.g from ANTLR book (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation), 'action'), (r'(scope)(\s+)(' + _id + ')(\s*)(;)', bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)), # ruleAction (r'(@' + _id + ')(\s*)(\{)', bygroups(Name.Label, Whitespace, Punctuation), 'action'), # finished prelims, go to rule alts! (r':', Punctuation, '#pop') ], 'rule-alts': [ include('whitespace'), include('comments'),
# These might need to go in a separate 'block' state triggered by ( (r'options\b', Keyword, 'options'), (r':', Punctuation),
# literals (r"'(\\\\|\\'|[^'])*'", String), (r'"(\\\\|\\"|[^"])*"', String), (r'<<([^>]|>[^>])>>', String), # identifiers # Tokens start with capital letter. (r'\$?[A-Z_]\w*', Name.Constant), # Rules start with small letter. (r'\$?[a-z_]\w*', Name.Variable), # operators (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator), (r',', Punctuation), (r'\[', Punctuation, 'nested-arg-action'), (r'\{', Punctuation, 'action'), (r';', Punctuation, '#pop') ], 'tokens': [ include('whitespace'), include('comments'), (r'\{', Punctuation), (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL + ')?(\s*)(;)', bygroups(Name.Label, Whitespace, Punctuation, Whitespace, String, Whitespace, Punctuation)), (r'\}', Punctuation, '#pop'), ], 'options': [ include('whitespace'), include('comments'), (r'\{', Punctuation), (r'(' + _id + r')(\s*)(=)(\s*)(' + '|'.join((_id, _STRING_LITERAL, _INT, '\*')) + ')(\s*)(;)', bygroups(Name.Variable, Whitespace, Punctuation, Whitespace, Text, Whitespace, Punctuation)), (r'\}', Punctuation, '#pop'), ], 'action': [ (r'(' + r'|'.join(( # keep host code in largest possible chunks r'[^${}\'"/\\]+', # exclude unsafe characters
# strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r'//.*$\n?', # single line comment r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
# regular expression: There's no reason for it to start # with a * and this stops confusion with comments. r'/(?!\*)(\\\\|\\/|[^/])*/',
# backslashes are okay, as long as we are not backslashing a % r'\\(?!%)',
# Now that we've handled regex and javadoc comments # it's safe to let / through. r'/', )) + r')+', Other), (r'(\\)(%)', bygroups(Punctuation, Other)), (r'(\$[a-zA-Z]+)(\.?)(text|value)?', bygroups(Name.Variable, Punctuation, Name.Property)), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'nested-arg-action': [ (r'(' + r'|'.join(( # keep host code in largest possible chunks. r'[^$\[\]\'"/]+', # exclude unsafe characters
# strings and comments may safely contain unsafe characters r'"(\\\\|\\"|[^"])*"', # double quote string r"'(\\\\|\\'|[^'])*'", # single quote string r'//.*$\n?', # single line comment r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
# regular expression: There's no reason for it to start # with a * and this stops confusion with comments. r'/(?!\*)(\\\\|\\/|[^/])*/',
# Now that we've handled regex and javadoc comments # it's safe to let / through. r'/', )) + r')+', Other),
(r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), (r'(\$[a-zA-Z]+)(\.?)(text|value)?', bygroups(Name.Variable, Punctuation, Name.Property)), (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other), ] }
# http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
# TH: I'm not aware of any language features of C++ that will cause # incorrect lexing of C files. Antlr doesn't appear to make a distinction, # so just assume they're C++. No idea how to make Objective C work in the # future.
# class AntlrCLexer(DelegatingLexer): # """ # ANTLR with C Target # # .. versionadded:: 1.1 # """ # # name = 'ANTLR With C Target' # aliases = ['antlr-c'] # filenames = ['*.G', '*.g'] # # def __init__(self, **options): # super(AntlrCLexer, self).__init__(CLexer, AntlrLexer, **options) # # def analyse_text(text): # return re.match(r'^\s*language\s*=\s*C\s*;', text)
""" `ANTLR`_ with CPP Target
.. versionadded:: 1.1 """
re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
""" `ANTLR`_ with Objective-C Target
.. versionadded:: 1.1 """
AntlrLexer, **options)
re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
""" `ANTLR`_ with C# Target
.. versionadded:: 1.1 """
**options)
re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
""" `ANTLR`_ with Python Target
.. versionadded:: 1.1 """
**options)
re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
""" `ANTLR`_ with Java Target
.. versionadded:: 1. """
**options)
# Antlr language is Java by default
""" `ANTLR`_ with Ruby Target
.. versionadded:: 1.1 """
**options)
re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
""" `ANTLR`_ with Perl Target
.. versionadded:: 1.1 """
**options)
re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
""" `ANTLR`_ with ActionScript Target
.. versionadded:: 1.1 """
AntlrLexer, **options)
re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
""" A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars. Not for direct use; use TreetopLexer instead.
.. versionadded:: 1.6 """
'root': [ include('space'), (r'require[ \t]+[^\n\r]+[\n\r]', Other), (r'module\b', Keyword.Namespace, 'module'), (r'grammar\b', Keyword, 'grammar'), ], 'module': [ include('space'), include('end'), (r'module\b', Keyword, '#push'), (r'grammar\b', Keyword, 'grammar'), (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace), ], 'grammar': [ include('space'), include('end'), (r'rule\b', Keyword, 'rule'), (r'include\b', Keyword, 'include'), (r'[A-Z]\w*', Name), ], 'include': [ include('space'), (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'), ], 'rule': [ include('space'), include('end'), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)), (r'[A-Za-z_]\w*', Name), (r'[()]', Punctuation), (r'[?+*/&!~]', Operator), (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex), (r'([0-9]*)(\.\.)([0-9]*)', bygroups(Number.Integer, Operator, Number.Integer)), (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)), (r'\{', Punctuation, 'inline_module'), (r'\.', String.Regex), ], 'inline_module': [ (r'\{', Other, 'ruby'), (r'\}', Punctuation, '#pop'), (r'[^{}]+', Other), ], 'ruby': [ (r'\{', Other, '#push'), (r'\}', Other, '#pop'), (r'[^{}]+', Other), ], 'space': [ (r'[ \t\n\r]+', Whitespace), (r'#[^\n]*', Comment.Single), ], 'end': [ (r'end\b', Keyword, '#pop'), ], }
""" A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
.. versionadded:: 1.6 """
""" Lexer for `ISO/IEC 14977 EBNF <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_ grammars.
.. versionadded:: 2.0 """
'root': [ include('whitespace'), include('comment_start'), include('identifier'), (r'=', Operator, 'production'), ], 'production': [ include('whitespace'), include('comment_start'), include('identifier'), (r'"[^"]*"', String.Double), (r"'[^']*'", String.Single), (r'(\?[^?]*\?)', Name.Entity), (r'[\[\]{}(),|]', Punctuation), (r'-', Operator), (r';', Punctuation, '#pop'), (r'\.', Punctuation, '#pop'), ], 'whitespace': [ (r'\s+', Text), ], 'comment_start': [ (r'\(\*', Comment.Multiline, 'comment'), ], 'comment': [ (r'[^*)]', Comment.Multiline), include('comment_start'), (r'\*\)', Comment.Multiline, '#pop'), (r'[*)]', Comment.Multiline), ], 'identifier': [ (r'([a-zA-Z][\w \-]*)', Keyword), ], } |