Synopsis - Cross-Reference
File: Synopsis/Parsers/Python/SXRGenerator.py1# 2# Copyright (C) 2008 Stefan Seefeld 3# All rights reserved. 4# Licensed to the public under the terms of the GNU LGPL (>= 2), 5# see the file COPYING for details. 6# 7 8import parser 9import token 10import tokenize 11import symbol 12import keyword 13 14HAVE_ENCODING_DECL = hasattr(symbol, "encoding_decl") # python 2.3 15HAVE_IMPORT_NAME = hasattr(symbol, "import_name") # python 2.4 16HAVE_DECORATOR = hasattr(symbol,"decorator") # python 2.4 17 18def num_tokens(ptree): 19 """Count the number of leaf tokens in the given ptree.""" 20 21 if type(ptree) == str: return 1 22 else: return sum([num_tokens(n) for n in ptree[1:]]) 23 24 25class LexerDebugger: 26 27 def __init__(self, lexer): 28 29 self.lexer = lexer 30 31 def next(self): 32 33 n = self.lexer.next() 34 print 'next is "%s" (%s)'%(n[1], n[0]) 35 return n 36 37header="""<sxr filename="%(filename)s"> 38<line>""" 39 40trailer="""</line> 41</sxr> 42""" 43 44def escape(text): 45 46 for p in [('&', '&'), ('"', '"'), ('<', '<'), ('>', '>'),]: 47 text = text.replace(*p) 48 return text 49 50 51class SXRGenerator: 52 """""" 53 54 def __init__(self): 55 """""" 56 57 self.handlers = {} 58 self.handlers[token.ENDMARKER] = self.handle_end_marker 59 self.handlers[token.NEWLINE] = self.handle_newline 60 self.handlers[token.INDENT] = self.handle_indent 61 self.handlers[token.DEDENT] = self.handle_dedent 62 self.handlers[token.STRING] = self.handle_string 63 self.handlers[symbol.funcdef]= self.handle_function 64 self.handlers[symbol.parameters] = self.handle_parameters 65 self.handlers[symbol.classdef] = self.handle_class 66 self.handlers[token.NAME] = self.handle_name 67 self.handlers[symbol.expr_stmt] = self.handle_expr_stmt 68 #self.handlers[token.OP] = self.handle_op 69 self.handlers[symbol.power] = self.handle_power 70 if HAVE_ENCODING_DECL: 71 self.handlers[symbol.encoding_decl] = self.handle_encoding_decl 72 if HAVE_IMPORT_NAME: 73 self.handlers[symbol.import_as_names] = self.handle_import_as_names 74 self.handlers[symbol.dotted_as_names] = self.handle_dotted_as_names 75 self.handlers[symbol.import_from] = self.handle_import_from 76 self.handlers[symbol.import_name] = self.handle_import_name 77 else: 78 self.handlers[symbol.import_stmt] = self.handle_import 79 if HAVE_DECORATOR: 80 self.handlers[symbol.decorator] = self.handle_decorator 81 82 self.col = 0 83 self.lineno = 1 84 self.parameters = [] 85 self.scopes = [] 86 87 def process_file(self, scope, sourcefile, sxr): 88 89 self.scopes = list(scope) 90 input = open(sourcefile.abs_name, 'r+') 91 src = input.readlines() 92 self.lines = len(`len(src) + 1`) 93 ptree = parser.ast2tuple(parser.suite(''.join(src))) 94 input.seek(0) 95 self.lexer = tokenize.generate_tokens(input.readline) 96 #self.lexer = LexerDebugger(tokenize.generate_tokens(input.readline)) 97 self.sxr = open(sxr, 'w+') 98 lineno_template = '%%%ds' % self.lines 99 lineno = lineno_template % self.lineno 100 self.sxr.write(header % {'filename': sourcefile.name}) 101 try: 102 self.handle(ptree) 103 except StopIteration: 104 raise 105 self.sxr.write(trailer) 106 self.scopes.pop() 107 108 def handle(self, ptree): 109 110 if type(ptree) == tuple: 111 kind = ptree[0] 112 value = ptree[1:] 113 handler = self.handlers.get(kind, self.default_handler) 114 handler(value) 115 else: 116 raise Exception("Process error: Type is not a tuple %s" % str(ptree)) 117 118 119 def default_handler(self, ptree): 120 121 for node in ptree: 122 if type(node) == tuple: self.handle(node) 123 elif type(node) == str: self.handle_token(node) 124 else: raise Exception("Invalid ptree node") 125 126 127 def next_token(self): 128 """Return the next visible token. 129 Process tokens that are not part of the parse tree silently.""" 130 131 t = self.lexer.next() 132 while t[0] in [tokenize.NL, tokenize.COMMENT]: 133 if t[0] is tokenize.NL: 134 self.print_newline() 135 elif t[0] is tokenize.COMMENT: 136 self.print_token(t) 137 if t[1][-1] == '\n': self.print_newline() 138 t = self.lexer.next() 139 return t 140 141 142 def handle_token(self, item = None): 143 144 t = self.next_token() 145 if item is not None and t[1] != item: 146 raise 'Internal error in line %d: expected "%s", got "%s" (%d)'%(self.lineno, item, t[1], t[0]) 147 else: 148 self.print_token(t) 149 150 151 def handle_name_as_xref(self, xref, name, from_ = None, type = None): 152 153 kind, value, (srow, scol), (erow, ecol), line = self.next_token() 154 if (kind, value) != (token.NAME, name): 155 raise 'Internal error in line %d: expected name "%s", got "%s" (%d)'%(name, self.lineno, item, t[1], t[0]) 156 157 if self.col != scol: 158 self.sxr.write(' ' * (scol - self.col)) 159 attrs = [] 160 if from_: attrs.append('from="%s"'%from_) 161 if type: attrs.append('type="%s"'%type) 162 a = '<a href="%s" %s>%s</a>'%('.'.join(xref), ' '.join(attrs), value) 163 self.sxr.write(a) 164 self.col = ecol 165 166 167 def handle_tokens(self, ptree): 168 169 tokens = num_tokens(ptree) 170 for i in xrange(tokens): 171 self.handle_token() 172 173 174 def handle_end_marker(self, nodes): pass 175 def handle_newline(self, nodes): 176 177 self.handle_token() 178 179 180 def handle_indent(self, indent): 181 182 self.handle_token() 183 184 185 def handle_dedent(self, dedent): 186 187 self.handle_token() 188 189 190 def handle_string(self, content): 191 192 self.handle_token() 193 194 195 def handle_function(self, nodes): 196 197 if HAVE_DECORATOR: 198 if nodes[0][0] == symbol.decorators: 199 offset = 1 200 # FIXME 201 self.handle(nodes[0]) 202 else: 203 offset = 0 204 else: 205 offset = 0 206 207 def_token = nodes[0 + offset] 208 self.handle_token(def_token[1]) 209 name = nodes[1 + offset][1] 210 qname = tuple(self.scopes + [name]) 211 self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition') 212 # Handle the parameters. 213 self.handle(nodes[2 + offset]) 214 215 colon_token = nodes[3 + offset] 216 self.handle_token(colon_token[1]) 217 body = nodes[4 + offset] 218 # Theoretically, we'd have to push the function scope here. 219 # Practically, however, we don't inject xrefs (yet) into function bodies. 220 self.handle_tokens(body) 221 222 # Don't traverse the function body, since the ASG doesn't handle 223 # local declarations anyways. 224 225 226 def handle_parameters(self, nodes): 227 228 self.handle_token(nodes[0][1]) 229 if nodes[1][0] == symbol.varargslist: 230 args = list(nodes[1][1:]) 231 while args: 232 if args[0][0] == token.COMMA: 233 self.handle_token(args[0][1]) 234 pass 235 elif args[0][0] == symbol.fpdef: 236 self.handle_tokens(args[0]) 237 elif args[0][0] == token.EQUAL: 238 self.handle_token(args[0][1]) 239 del args[0] 240 self.handle_tokens(args[0]) 241 elif args[0][0] == token.DOUBLESTAR: 242 self.handle_token(args[0][1]) 243 del args[0] 244 self.handle_token(args[0][1]) 245 elif args[0][0] == token.STAR: 246 self.handle_token(args[0][1]) 247 del args[0] 248 self.handle_token(args[0][1]) 249 else: 250 print "Unknown symbol:",args[0] 251 del args[0] 252 self.handle_token(nodes[-1][1]) 253 254 255 def handle_class(self, nodes): 256 257 class_token = nodes[0] 258 self.handle_token(class_token[1]) 259 name = nodes[1][1] 260 qname = tuple(self.scopes + [name]) 261 self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition') 262 base_clause = nodes[2][0] == token.LPAR and nodes[3] or None 263 self.handle_tokens(nodes[2]) 264 bases = [] 265 if base_clause: 266 self.handle_tokens(base_clause) 267 self.handle_token(')') 268 self.handle_token(':') 269 270 body = nodes[6] 271 else: 272 body = nodes[3] 273 self.scopes.append(name) 274 self.handle(body) 275 self.scopes.pop() 276 277 278 def handle_name(self, content): 279 280 self.handle_token(content[0]) 281 282 283 def handle_expr_stmt(self, nodes): 284 285 for n in nodes: self.handle_tokens(n) 286 287 288 def handle_dotted_name(self, dname, rest): 289 290 self.handle_token(dname[0]) 291 for name in dname[1:]: 292 self.handle_token('.') 293 self.handle_token(name) 294 map(self.handle, rest) 295 296 297 def handle_op(self, nodes): pass 298 299 300 def handle_power(self, content): 301 302 def get_dotted_name(content): 303 if content[0][0] != symbol.atom or content[0][1][0] != token.NAME: 304 return None 305 dotted_name = [content[0][1][1]] 306 i = 1 307 for param in content[1:]: 308 if param[0] != symbol.trailer: break 309 if param[1][0] != token.DOT: break 310 if param[2][0] != token.NAME: break 311 dotted_name.append(param[2][1]) 312 i += 1 313 if i < len(content): return dotted_name, content[i:] 314 else: return dotted_name, [] 315 316 name = get_dotted_name(content) 317 if name: self.handle_dotted_name(*name) 318 else: map(self.handle, content) 319 320 321 def handle_encoding_decl(self, nodes): pass 322 def handle_import_as_names(self, nodes): 323 324 for n in nodes: self.handle(n) 325 326 327 def handle_dotted_as_names(self, nodes): 328 329 for n in nodes: self.handle(n) 330 331 332 def handle_import_from(self, nodes): 333 334 self.handle_token('from') 335 self.handle(nodes[1]) 336 self.handle_token('import') 337 self.handle(nodes[3]) 338 339 340 def handle_import_name(self, nodes): 341 342 self.handle_token('import') 343 self.handle_dotted_as_names(nodes[1][1:]) 344 345 346 def handle_import(self, nodes): 347 348 #self.handle_token('import') 349 for n in nodes: self.handle(n) 350 351 352 def handle_decorator(self, nodes): pass 353 354 355 def print_token(self, t): 356 357 kind, value, (srow, scol), (erow, ecol), line = t 358 if kind == token.NEWLINE: 359 self.print_newline() 360 else: 361 if self.col != scol: 362 self.sxr.write(' ' * (scol - self.col)) 363 if keyword.iskeyword(value): 364 format = '<span class="py-keyword">%s</span>' 365 elif kind == token.STRING: 366 format = '<span class="py-string">%s</span>' 367 chunks = value.split('\n') 368 for c in chunks[:-1]: 369 self.sxr.write(format % escape(c)) 370 self.print_newline() 371 value = chunks[-1] 372 373 elif kind == tokenize.COMMENT: 374 format = '<span class="py-comment">%s</span>' 375 if value[-1] == '\n': value = value[:-1] 376 else: 377 format = '%s' 378 379 self.sxr.write(format % escape(value)) 380 self.col = ecol 381 382 383 def print_newline(self): 384 385 self.col = 0 386 self.lineno += 1 387 self.sxr.write('</line>\n') 388 self.sxr.write('<line>') 389 390 391
Generated on Tue May 13 02:39:45 2008 by
synopsis (version 0.10)
synopsis (version 0.10)