class PuppetLint::Lexer
Internal: The puppet-lint lexer. Converts your manifest into its tokenised form.
Constants
- APP_MANAGEMENT_TOKENS
Internal: A Hash whose keys are Strings representing reserved keywords in the Puppet DSL when Application Management is enabled From github.com/puppetlabs/puppet/blob/master/lib/puppet/pops/parser/lexer2.rb#L142-L159 or therabouts Currently unused
- FORMATTING_TOKENS
Internal: A Hash whose keys are Symbols representing token types which are considered to be formatting tokens (i.e. tokens that don't contain code).
- KEYWORDS
Internal: A Hash whose keys are Strings representing reserved keywords in the Puppet DSL. From github.com/puppetlabs/puppet/blob/master/lib/puppet/pops/parser/lexer2.rb#L116-L137 or thereabouts
- KNOWN_TOKENS
- NAME_RE
Internal: An Array of Arrays containing tokens that can be described by a single regular expression. Each sub-Array contains 2 elements, the name of the token as a Symbol and a regular expression describing the value of the token.
- REGEX_PREV_TOKENS
Internal: A Hash whose keys are Symbols representing token types which a regular expression can follow.
- WHITESPACE_RE
t == tab v == vertical tab f == form feed p{Zs} == ASCII + Unicode non-linebreaking whitespace
Public Class Methods
# File lib/puppet-lint/lexer.rb, line 40 def self.heredoc_queue @heredoc_queue ||= [] end
# File lib/puppet-lint/lexer.rb, line 35 def initialize @line_no = 1 @column = 1 end
Public Instance Methods
# File lib/puppet-lint/lexer.rb, line 44 def heredoc_queue self.class.heredoc_queue end
# File lib/puppet-lint/lexer.rb, line 308 def slurp_string(string) dq_str_regexp = %r{(\$\{|(\A|[^\])(\\)*")}m scanner = StringScanner.new(string) contents = scanner.scan_until(dq_str_regexp) if scanner.matched.nil? raise LexerError.new(@line_no, @column, 'Double quoted string missing closing quote') end until scanner.matched.end_with?('"') contents += scanner.scan_until(%r{\}}m) contents += scanner.scan_until(dq_str_regexp) end
Internal: Convert a Puppet manifest into tokens.
code - The Puppet manifest to be tokenised as a String.
Returns an Array of PuppetLint::Lexer::Token objects. Raises PuppetLint::LexerError if it encounters unexpected characters (usually the result of syntax errors).
# File lib/puppet-lint/lexer.rb, line 188 def tokenise(code) i = 0 while i < code.size chunk = code[i..-1] found = false KNOWN_TOKENS.each do |type, regex| value = chunk[regex, 1] next if value.nil? length = value.size tokens << if type == :NAME && KEYWORDS.include?(value) new_token(value.upcase.to_sym, value) else new_token(type, value) end i += length found = true break end next if found if var_name = chunk[%r{\A\$((::)?(\w+(-\w+)*::)*\w+(-\w+)*(\[.+?\])*)}, 1] length = var_name.size + 1 tokens << new_token(:VARIABLE, var_name) elsif chunk =~ %r{\A'.*?'}m str_content = StringScanner.new(code[i + 1..-1]).scan_until(%r{(\A|[^\])(\\)*'}m) length = str_content.size + 1 tokens << new_token(:SSTRING, str_content[0..-2]) elsif chunk.start_with?('"') str_contents = slurp_string(code[i + 1..-1]) lines_parsed = code[0..i].split("\n") interpolate_string(str_contents, lines_parsed.count, lines_parsed.last.length) length = str_contents.size + 1 elsif heredoc_name = chunk[%r{\A@\(("?.+?"?(:.+?)?(/.*?)?)\)}, 1] heredoc_queue << heredoc_name tokens << new_token(:HEREDOC_OPEN, heredoc_name) length = heredoc_name.size + 3 elsif comment = chunk[%r{\A(#.*)}, 1] length = comment.size comment.sub!(%r{#}, '') tokens << new_token(:COMMENT, comment) elsif slash_comment = chunk[%r{\A(//.*)}, 1] length = slash_comment.size slash_comment.sub!(%r{//}, '') tokens << new_token(:SLASH_COMMENT, slash_comment) elsif mlcomment = chunk[%r{\A(/\*.*?\*/)}m, 1] length = mlcomment.size mlcomment_raw = mlcomment.dup mlcomment.sub!(%r{\A/\* ?}, '') mlcomment.sub!(%r{ ?\*/\Z}, '') mlcomment.gsub!(%r{^ *\*}, '') tokens << new_token(:MLCOMMENT, mlcomment, :raw => mlcomment_raw) elsif chunk.match(%r{\A/.*?/}) && possible_regex? str_content = StringScanner.new(code[i + 1..-1]).scan_until(%r{(\A|[^\])(\\)*/}m) length = str_content.size + 1 tokens << new_token(:REGEX, str_content[0..-2]) elsif eolindent = chunk[%r{\A((\r\n|\r|\n)#{WHITESPACE_RE}+)}m, 1] eol = eolindent[%r{\A([\r\n]+)}m, 1] tokens << new_token(:NEWLINE, eol) length = eol.size if heredoc_queue.empty? indent = eolindent[%r{\A[\r\n]+(#{WHITESPACE_RE}+)}m, 1] tokens << new_token(:INDENT, indent) length += indent.size else heredoc_tag = heredoc_queue.shift heredoc_name = heredoc_tag[%r{\A"?(.+?)"?(:.+?)?(/.*)?\Z}, 1] str_contents = StringScanner.new(code[(i + length)..-1]).scan_until(%r{\|?\s*-?\s*#{heredoc_name}}) interpolate_heredoc(str_contents, heredoc_tag) length += str_contents.size end elsif whitespace = chunk[%r{\A(#{WHITESPACE_RE}+)}, 1] length = whitespace.size tokens << new_token(:WHITESPACE, whitespace) elsif eol = chunk[%r{\A(\r\n|\r|\n)}, 1] length = eol.size tokens << new_token(:NEWLINE, eol) unless heredoc_queue.empty? heredoc_tag = heredoc_queue.shift heredoc_name = heredoc_tag[%r{\A"?(.+?)"?(:.+?)?(/.*)?\Z}, 1] str_contents = StringScanner.new(code[(i + length)..-1]).scan_until(%r{\|?\s*-?\s*#{heredoc_name}}) _ = code[0..(i + length)].split("\n") interpolate_heredoc(str_contents, heredoc_tag) length += str_contents.size end elsif chunk.start_with?('/') length = 1 tokens << new_token(:DIV, '/') elsif chunk.start_with?('@') length = 1 tokens << new_token(:AT, '@') else raise PuppetLint::LexerError.new(@line_no, @column) end i += length end tokens end
Internal: Access the internal token storage.
Returns an Array of PuppetLint::Lexer::Toxen objects.
# File lib/puppet-lint/lexer.rb, line 177 def tokens @tokens ||= [] end