Synopsis - Cross-Reference

File: Synopsis/Parsers/Python/SXRGenerator.py
  1#
  2# Copyright (C) 2008 Stefan Seefeld
  3# All rights reserved.
  4# Licensed to the public under the terms of the GNU LGPL (>= 2),
  5# see the file COPYING for details.
  6#
  7
  8import parser
  9import token
 10import tokenize
 11import symbol
 12import keyword
 13
 14HAVE_ENCODING_DECL = hasattr(symbol, "encoding_decl") # python 2.3
 15HAVE_IMPORT_NAME = hasattr(symbol, "import_name") # python 2.4
 16HAVE_DECORATOR = hasattr(symbol,"decorator") # python 2.4
 17
 18def num_tokens(ptree):
 19    """Count the number of leaf tokens in the given ptree."""
 20
 21    if type(ptree) == str: return 1
 22    else: return sum([num_tokens(n) for n in ptree[1:]])
 23
 24
 25class LexerDebugger:
 26
 27    def __init__(self, lexer):
 28
 29        self.lexer = lexer
 30
 31    def next(self):
 32
 33        n = self.lexer.next()
 34        print 'next is "%s" (%s)'%(n[1], n[0])
 35        return n
 36
 37header="""<sxr filename="%(filename)s">
 38<line>"""
 39
 40trailer="""</line>
 41</sxr>
 42"""
 43
 44def escape(text):
 45
 46    for p in [('&', '&amp;'), ('"', '&quot;'), ('<', '&lt;'), ('>', '&gt;'),]:
 47        text = text.replace(*p)
 48    return text
 49
 50
 51class SXRGenerator:
 52    """"""
 53
 54    def __init__(self):
 55        """"""
 56
 57        self.handlers = {}
 58        self.handlers[token.ENDMARKER] = self.handle_end_marker
 59        self.handlers[token.NEWLINE] = self.handle_newline
 60        self.handlers[token.INDENT] = self.handle_indent
 61        self.handlers[token.DEDENT] = self.handle_dedent
 62        self.handlers[token.STRING] = self.handle_string
 63        self.handlers[symbol.funcdef]= self.handle_function
 64        self.handlers[symbol.parameters] = self.handle_parameters
 65        self.handlers[symbol.classdef] = self.handle_class
 66        self.handlers[token.NAME] = self.handle_name
 67        self.handlers[symbol.expr_stmt] = self.handle_expr_stmt
 68        #self.handlers[token.OP] = self.handle_op
 69        self.handlers[symbol.power] = self.handle_power
 70        if HAVE_ENCODING_DECL:
 71            self.handlers[symbol.encoding_decl] = self.handle_encoding_decl
 72        if HAVE_IMPORT_NAME:
 73            self.handlers[symbol.import_as_names] = self.handle_import_as_names
 74            self.handlers[symbol.dotted_as_names] = self.handle_dotted_as_names
 75            self.handlers[symbol.import_from] = self.handle_import_from
 76            self.handlers[symbol.import_name] = self.handle_import_name
 77        else:
 78            self.handlers[symbol.import_stmt] = self.handle_import
 79        if HAVE_DECORATOR:
 80            self.handlers[symbol.decorator] = self.handle_decorator
 81
 82        self.col = 0
 83        self.lineno = 1
 84        self.parameters = []
 85        self.scopes = []
 86
 87    def process_file(self, scope, sourcefile, sxr):
 88
 89        self.scopes = list(scope)
 90        input = open(sourcefile.abs_name, 'r+')
 91        src = input.readlines()
 92        self.lines = len(`len(src) + 1`)
 93        ptree = parser.ast2tuple(parser.suite(''.join(src)))
 94        input.seek(0)
 95        self.lexer = tokenize.generate_tokens(input.readline)
 96        #self.lexer = LexerDebugger(tokenize.generate_tokens(input.readline))
 97        self.sxr = open(sxr, 'w+')
 98        lineno_template = '%%%ds' % self.lines
 99        lineno = lineno_template % self.lineno
100        self.sxr.write(header % {'filename': sourcefile.name})
101        try:
102            self.handle(ptree)
103        except StopIteration:
104            raise
105        self.sxr.write(trailer)
106        self.scopes.pop()
107
108    def handle(self, ptree):
109
110        if type(ptree) == tuple:
111            kind = ptree[0]
112            value = ptree[1:]
113            handler = self.handlers.get(kind, self.default_handler)
114            handler(value)
115        else:
116            raise Exception("Process error: Type is not a tuple %s" % str(ptree))
117
118
119    def default_handler(self, ptree):
120
121        for node in ptree:
122            if type(node) == tuple: self.handle(node)
123            elif type(node) == str: self.handle_token(node)
124            else: raise Exception("Invalid ptree node")
125
126
127    def next_token(self):
128        """Return the next visible token.
129        Process tokens that are not part of the parse tree silently."""
130
131        t = self.lexer.next()
132        while t[0] in [tokenize.NL, tokenize.COMMENT]:
133            if t[0] is tokenize.NL:
134                self.print_newline()
135            elif t[0] is tokenize.COMMENT:
136                self.print_token(t)
137                if t[1][-1] == '\n': self.print_newline()
138            t = self.lexer.next()
139        return t
140
141
142    def handle_token(self, item = None):
143
144        t = self.next_token()
145        if item is not None and t[1] != item:
146            raise 'Internal error in line %d: expected "%s", got "%s" (%d)'%(self.lineno, item, t[1], t[0])
147        else:
148            self.print_token(t)
149
150
151    def handle_name_as_xref(self, xref, name, from_ = None, type = None):
152
153        kind, value, (srow, scol), (erow, ecol), line = self.next_token()
154        if (kind, value) != (token.NAME, name):
155            raise 'Internal error in line %d: expected name "%s", got "%s" (%d)'%(name, self.lineno, item, t[1], t[0])
156
157        if self.col != scol:
158            self.sxr.write(' ' * (scol - self.col))
159        attrs = []
160        if from_: attrs.append('from="%s"'%from_)
161        if type: attrs.append('type="%s"'%type)
162        a = '<a href="%s" %s>%s</a>'%('.'.join(xref), ' '.join(attrs), value)
163        self.sxr.write(a)
164        self.col = ecol
165
166
167    def handle_tokens(self, ptree):
168
169        tokens = num_tokens(ptree)
170        for i in xrange(tokens):
171            self.handle_token()
172
173
174    def handle_end_marker(self, nodes): pass
175    def handle_newline(self, nodes):
176
177        self.handle_token()
178
179
180    def handle_indent(self, indent):
181
182        self.handle_token()
183
184
185    def handle_dedent(self, dedent):
186
187        self.handle_token()
188
189
190    def handle_string(self, content):
191
192        self.handle_token()
193
194
195    def handle_function(self, nodes):
196
197        if HAVE_DECORATOR:
198            if nodes[0][0] == symbol.decorators:
199                offset = 1
200                # FIXME
201                self.handle(nodes[0])
202            else:
203                offset = 0
204        else:
205            offset = 0
206
207        def_token = nodes[0 + offset]
208        self.handle_token(def_token[1])
209        name = nodes[1 + offset][1]
210        qname = tuple(self.scopes + [name])
211        self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
212        # Handle the parameters.
213        self.handle(nodes[2 + offset])
214
215        colon_token = nodes[3 + offset]
216        self.handle_token(colon_token[1])
217        body = nodes[4 + offset]
218        # Theoretically, we'd have to push the function scope here.
219        # Practically, however, we don't inject xrefs (yet) into function bodies.
220        self.handle_tokens(body)
221
222        # Don't traverse the function body, since the ASG doesn't handle
223        # local declarations anyways.
224
225
226    def handle_parameters(self, nodes):
227
228        self.handle_token(nodes[0][1])
229        if nodes[1][0] == symbol.varargslist:
230            args = list(nodes[1][1:])
231            while args:
232                if args[0][0] == token.COMMA:
233                    self.handle_token(args[0][1])
234                    pass
235                elif args[0][0] == symbol.fpdef:
236                    self.handle_tokens(args[0])
237                elif args[0][0] == token.EQUAL:
238                    self.handle_token(args[0][1])
239                    del args[0]
240                    self.handle_tokens(args[0])
241                elif args[0][0] == token.DOUBLESTAR:
242                    self.handle_token(args[0][1])
243                    del args[0]
244                    self.handle_token(args[0][1])
245                elif args[0][0] == token.STAR:
246                    self.handle_token(args[0][1])
247                    del args[0]
248                    self.handle_token(args[0][1])
249                else:
250                    print "Unknown symbol:",args[0]
251                del args[0]
252        self.handle_token(nodes[-1][1])
253
254
255    def handle_class(self, nodes):
256
257        class_token = nodes[0]
258        self.handle_token(class_token[1])
259        name = nodes[1][1]
260        qname = tuple(self.scopes + [name])
261        self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
262        base_clause = nodes[2][0] == token.LPAR and nodes[3] or None
263        self.handle_tokens(nodes[2])
264        bases = []
265        if base_clause:
266            self.handle_tokens(base_clause)
267            self.handle_token(')')
268            self.handle_token(':')
269
270            body = nodes[6]
271        else:
272            body = nodes[3]
273        self.scopes.append(name)
274        self.handle(body)
275        self.scopes.pop()
276
277
278    def handle_name(self, content):
279
280        self.handle_token(content[0])
281
282
283    def handle_expr_stmt(self, nodes):
284
285        for n in nodes: self.handle_tokens(n)
286
287
288    def handle_dotted_name(self, dname, rest):
289
290        self.handle_token(dname[0])
291        for name in dname[1:]:
292            self.handle_token('.')
293            self.handle_token(name)
294        map(self.handle, rest)
295
296
297    def handle_op(self, nodes): pass
298
299
300    def handle_power(self, content):
301
302        def get_dotted_name(content):
303            if content[0][0] != symbol.atom or content[0][1][0] != token.NAME:
304                return None
305            dotted_name = [content[0][1][1]]
306            i = 1
307            for param in content[1:]:
308                if param[0] != symbol.trailer: break
309                if param[1][0] != token.DOT: break
310                if param[2][0] != token.NAME: break
311                dotted_name.append(param[2][1])
312                i += 1
313            if i < len(content): return dotted_name, content[i:]
314            else: return dotted_name, []
315
316        name = get_dotted_name(content)
317        if name: self.handle_dotted_name(*name)
318        else: map(self.handle, content)
319
320
321    def handle_encoding_decl(self, nodes): pass
322    def handle_import_as_names(self, nodes):
323
324        for n in nodes: self.handle(n)
325
326
327    def handle_dotted_as_names(self, nodes):
328
329        for n in nodes: self.handle(n)
330
331
332    def handle_import_from(self, nodes):
333
334        self.handle_token('from')
335        self.handle(nodes[1])
336        self.handle_token('import')
337        self.handle(nodes[3])
338
339
340    def handle_import_name(self, nodes):
341
342        self.handle_token('import')
343        self.handle_dotted_as_names(nodes[1][1:])
344
345
346    def handle_import(self, nodes):
347
348        #self.handle_token('import')
349        for n in nodes: self.handle(n)
350
351
352    def handle_decorator(self, nodes): pass
353
354
355    def print_token(self, t):
356
357        kind, value, (srow, scol), (erow, ecol), line = t
358        if kind == token.NEWLINE:
359            self.print_newline()
360        else:
361            if self.col != scol:
362                self.sxr.write(' ' * (scol - self.col))
363            if keyword.iskeyword(value):
364                format = '<span class="py-keyword">%s</span>'
365            elif kind == token.STRING:
366                format = '<span class="py-string">%s</span>'
367                chunks = value.split('\n')
368                for c in chunks[:-1]:
369                    self.sxr.write(format % escape(c))
370                    self.print_newline()
371                value = chunks[-1]
372
373            elif kind == tokenize.COMMENT:
374                format = '<span class="py-comment">%s</span>'
375                if value[-1] == '\n': value = value[:-1]
376            else:
377                format = '%s'
378
379            self.sxr.write(format % escape(value))
380            self.col = ecol
381
382
383    def print_newline(self):
384
385        self.col = 0
386        self.lineno += 1
387        self.sxr.write('</line>\n')
388        self.sxr.write('<line>')
389
390
391