Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

# -*- coding: utf-8 -*- 

""" 

    pygments.filters 

    ~~~~~~~~~~~~~~~~ 

 

    Module containing filter lookup functions and default 

    filters. 

 

    :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. 

    :license: BSD, see LICENSE for details. 

""" 

 

import re 

 

from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \ 

    string_to_tokentype 

from pygments.filter import Filter 

from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \ 

     get_choice_opt, ClassNotFound, OptionError, text_type, string_types 

from pygments.plugin import find_plugin_filters 

 

 

def find_filter_class(filtername): 

    """Lookup a filter by name. Return None if not found.""" 

    if filtername in FILTERS: 

        return FILTERS[filtername] 

    for name, cls in find_plugin_filters(): 

        if name == filtername: 

            return cls 

    return None 

 

 

def get_filter_by_name(filtername, **options): 

    """Return an instantiated filter. 

 

    Options are passed to the filter initializer if wanted. 

    Raise a ClassNotFound if not found. 

    """ 

    cls = find_filter_class(filtername) 

    if cls: 

        return cls(**options) 

    else: 

        raise ClassNotFound('filter %r not found' % filtername) 

 

 

def get_all_filters(): 

    """Return a generator of all filter names.""" 

    for name in FILTERS: 

        yield name 

    for name, _ in find_plugin_filters(): 

        yield name 

 

 

def _replace_special(ttype, value, regex, specialttype, 

                     replacefunc=lambda x: x): 

    last = 0 

    for match in regex.finditer(value): 

        start, end = match.start(), match.end() 

        if start != last: 

            yield ttype, value[last:start] 

        yield specialttype, replacefunc(value[start:end]) 

        last = end 

    if last != len(value): 

        yield ttype, value[last:] 

 

 

class CodeTagFilter(Filter): 

    """Highlight special code tags in comments and docstrings. 

 

    Options accepted: 

 

    `codetags` : list of strings 

       A list of strings that are flagged as code tags.  The default is to 

       highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``. 

    """ 

 

    def __init__(self, **options): 

        Filter.__init__(self, **options) 

        tags = get_list_opt(options, 'codetags', 

                            ['XXX', 'TODO', 'BUG', 'NOTE']) 

        self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([ 

            re.escape(tag) for tag in tags if tag 

        ])) 

 

    def filter(self, lexer, stream): 

        regex = self.tag_re 

        for ttype, value in stream: 

            if ttype in String.Doc or \ 

               ttype in Comment and \ 

               ttype not in Comment.Preproc: 

                for sttype, svalue in _replace_special(ttype, value, regex, 

                                                       Comment.Special): 

                    yield sttype, svalue 

            else: 

                yield ttype, value 

 

 

class KeywordCaseFilter(Filter): 

    """Convert keywords to lowercase or uppercase or capitalize them, which 

    means first letter uppercase, rest lowercase. 

 

    This can be useful e.g. if you highlight Pascal code and want to adapt the 

    code to your styleguide. 

 

    Options accepted: 

 

    `case` : string 

       The casing to convert keywords to. Must be one of ``'lower'``, 

       ``'upper'`` or ``'capitalize'``.  The default is ``'lower'``. 

    """ 

 

    def __init__(self, **options): 

        Filter.__init__(self, **options) 

        case = get_choice_opt(options, 'case', 

                              ['lower', 'upper', 'capitalize'], 'lower') 

        self.convert = getattr(text_type, case) 

 

    def filter(self, lexer, stream): 

        for ttype, value in stream: 

            if ttype in Keyword: 

                yield ttype, self.convert(value) 

            else: 

                yield ttype, value 

 

 

class NameHighlightFilter(Filter): 

    """Highlight a normal Name (and Name.*) token with a different token type. 

 

    Example:: 

 

        filter = NameHighlightFilter( 

            names=['foo', 'bar', 'baz'], 

            tokentype=Name.Function, 

        ) 

 

    This would highlight the names "foo", "bar" and "baz" 

    as functions. `Name.Function` is the default token type. 

 

    Options accepted: 

 

    `names` : list of strings 

      A list of names that should be given the different token type. 

      There is no default. 

    `tokentype` : TokenType or string 

      A token type or a string containing a token type name that is 

      used for highlighting the strings in `names`.  The default is 

      `Name.Function`. 

    """ 

 

    def __init__(self, **options): 

        Filter.__init__(self, **options) 

        self.names = set(get_list_opt(options, 'names', [])) 

        tokentype = options.get('tokentype') 

        if tokentype: 

            self.tokentype = string_to_tokentype(tokentype) 

        else: 

            self.tokentype = Name.Function 

 

    def filter(self, lexer, stream): 

        for ttype, value in stream: 

            if ttype in Name and value in self.names: 

                yield self.tokentype, value 

            else: 

                yield ttype, value 

 

 

class ErrorToken(Exception): 

    pass 

 

 

class RaiseOnErrorTokenFilter(Filter): 

    """Raise an exception when the lexer generates an error token. 

 

    Options accepted: 

 

    `excclass` : Exception class 

      The exception class to raise. 

      The default is `pygments.filters.ErrorToken`. 

 

    .. versionadded:: 0.8 

    """ 

 

    def __init__(self, **options): 

        Filter.__init__(self, **options) 

        self.exception = options.get('excclass', ErrorToken) 

        try: 

            # issubclass() will raise TypeError if first argument is not a class 

            if not issubclass(self.exception, Exception): 

                raise TypeError 

        except TypeError: 

            raise OptionError('excclass option is not an exception class') 

 

    def filter(self, lexer, stream): 

        for ttype, value in stream: 

            if ttype is Error: 

                raise self.exception(value) 

            yield ttype, value 

 

 

class VisibleWhitespaceFilter(Filter): 

    """Convert tabs, newlines and/or spaces to visible characters. 

 

    Options accepted: 

 

    `spaces` : string or bool 

      If this is a one-character string, spaces will be replaces by this string. 

      If it is another true value, spaces will be replaced by ``·`` (unicode 

      MIDDLE DOT).  If it is a false value, spaces will not be replaced.  The 

      default is ``False``. 

    `tabs` : string or bool 

      The same as for `spaces`, but the default replacement character is ``»`` 

      (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK).  The default value 

      is ``False``.  Note: this will not work if the `tabsize` option for the 

      lexer is nonzero, as tabs will already have been expanded then. 

    `tabsize` : int 

      If tabs are to be replaced by this filter (see the `tabs` option), this 

      is the total number of characters that a tab should be expanded to. 

      The default is ``8``. 

    `newlines` : string or bool 

      The same as for `spaces`, but the default replacement character is ``¶`` 

      (unicode PILCROW SIGN).  The default value is ``False``. 

    `wstokentype` : bool 

      If true, give whitespace the special `Whitespace` token type.  This allows 

      styling the visible whitespace differently (e.g. greyed out), but it can 

      disrupt background colors.  The default is ``True``. 

 

    .. versionadded:: 0.8 

    """ 

 

    def __init__(self, **options): 

        Filter.__init__(self, **options) 

        for name, default in [('spaces',   u'·'), 

                              ('tabs',     u'»'), 

                              ('newlines', u'¶')]: 

            opt = options.get(name, False) 

            if isinstance(opt, string_types) and len(opt) == 1: 

                setattr(self, name, opt) 

            else: 

                setattr(self, name, (opt and default or '')) 

        tabsize = get_int_opt(options, 'tabsize', 8) 

        if self.tabs: 

            self.tabs += ' ' * (tabsize - 1) 

        if self.newlines: 

            self.newlines += '\n' 

        self.wstt = get_bool_opt(options, 'wstokentype', True) 

 

    def filter(self, lexer, stream): 

        if self.wstt: 

            spaces = self.spaces or u' ' 

            tabs = self.tabs or u'\t' 

            newlines = self.newlines or u'\n' 

            regex = re.compile(r'\s') 

            def replacefunc(wschar): 

                if wschar == ' ': 

                    return spaces 

                elif wschar == '\t': 

                    return tabs 

                elif wschar == '\n': 

                    return newlines 

                return wschar 

 

            for ttype, value in stream: 

                for sttype, svalue in _replace_special(ttype, value, regex, 

                                                       Whitespace, replacefunc): 

                    yield sttype, svalue 

        else: 

            spaces, tabs, newlines = self.spaces, self.tabs, self.newlines 

            # simpler processing 

            for ttype, value in stream: 

                if spaces: 

                    value = value.replace(' ', spaces) 

                if tabs: 

                    value = value.replace('\t', tabs) 

                if newlines: 

                    value = value.replace('\n', newlines) 

                yield ttype, value 

 

 

class GobbleFilter(Filter): 

    """Gobbles source code lines (eats initial characters). 

 

    This filter drops the first ``n`` characters off every line of code.  This 

    may be useful when the source code fed to the lexer is indented by a fixed 

    amount of space that isn't desired in the output. 

 

    Options accepted: 

 

    `n` : int 

       The number of characters to gobble. 

 

    .. versionadded:: 1.2 

    """ 

    def __init__(self, **options): 

        Filter.__init__(self, **options) 

        self.n = get_int_opt(options, 'n', 0) 

 

    def gobble(self, value, left): 

        if left < len(value): 

            return value[left:], 0 

        else: 

            return u'', left - len(value) 

 

    def filter(self, lexer, stream): 

        n = self.n 

        left = n # How many characters left to gobble. 

        for ttype, value in stream: 

            # Remove ``left`` tokens from first line, ``n`` from all others. 

            parts = value.split('\n') 

            (parts[0], left) = self.gobble(parts[0], left) 

            for i in range(1, len(parts)): 

                (parts[i], left) = self.gobble(parts[i], n) 

            value = u'\n'.join(parts) 

 

            if value != '': 

                yield ttype, value 

 

 

class TokenMergeFilter(Filter): 

    """Merges consecutive tokens with the same token type in the output 

    stream of a lexer. 

 

    .. versionadded:: 1.2 

    """ 

    def __init__(self, **options): 

        Filter.__init__(self, **options) 

 

    def filter(self, lexer, stream): 

        current_type = None 

        current_value = None 

        for ttype, value in stream: 

            if ttype is current_type: 

                current_value += value 

            else: 

                if current_type is not None: 

                    yield current_type, current_value 

                current_type = ttype 

                current_value = value 

        if current_type is not None: 

            yield current_type, current_value 

 

 

FILTERS = { 

    'codetagify':     CodeTagFilter, 

    'keywordcase':    KeywordCaseFilter, 

    'highlight':      NameHighlightFilter, 

    'raiseonerror':   RaiseOnErrorTokenFilter, 

    'whitespace':     VisibleWhitespaceFilter, 

    'gobble':         GobbleFilter, 

    'tokenmerge':     TokenMergeFilter, 

}