File: Synopsis/Lexer.hh
  1//
  2// Copyright (C) 2004 Stefan Seefeld
  3// All rights reserved.
  4// Licensed to the public under the terms of the GNU LGPL (>= 2),
  5// see the file COPYING for details.
  6//
  7#ifndef Synopsis_Lexer_hh_
  8#define Synopsis_Lexer_hh_
  9
 10#include <Synopsis/Token.hh>
 11#include <vector>
 12#include <map>
 13#include <deque>
 14#include <stdexcept>
 15
 16namespace Synopsis
 17{
 18
 19class Buffer;
 20
 21//. a Lexer reads tokens from a stream.
 22class Lexer
 23{
 24public:
 25  //. Define sets of token that are to be recognized as special
 26  //. keywords (as opposed to identifiers). They can be or'ed.
 27  //. If CXX is not specified, the Lexer will operate in 'C mode'.
 28  enum TokenSet { C=0x0, CXX=0x01, GCC=0x02, MSVC=0x04};
 29  typedef std::vector<Token> Comments;
 30
 31  struct InvalidChar : std::runtime_error
 32  {
 33    InvalidChar(const std::string &msg) : std::runtime_error(msg) {}
 34  };
 35
 36  //. Construct a Lexer on the given Buffer using the given
 37  //. token set. The default token set is CXX with GCC extensions.
 38  Lexer(Buffer *, int tokenset = CXX|GCC);
 39  Token::Type get_token(Token &);
 40  Token::Type look_ahead(size_t);
 41  Token::Type look_ahead(size_t, Token &);
 42
 43  const char *save();
 44  void restore(const char *);
 45
 46  Comments get_comments();
 47
 48  //. Return the origin of the given pointer
 49  //. (filename and line number)
 50  unsigned long origin(const char *, std::string &) const;
 51private:
 52  //. a Queue is used to read in tokens from a stream
 53  //. without consuming them
 54  class Queue
 55  {
 56  public:
 57    typedef std::deque<Token> Container;
 58    typedef Container::size_type size_type;
 59
 60    bool empty() const { return my_container.empty();}
 61    size_type size() const { return my_container.size();}
 62    const Token &front() const { return my_container.front();}
 63    const Token &back() const { return my_container.back();}
 64    const Token &at(size_type i) const { return my_container.at(i);}
 65    void push(const Token &t) { my_container.push_back(t);}
 66    void pop() { my_container.pop_front();}
 67    void clear() { my_container.clear();}
 68  private:
 69    Container my_container;
 70  };
 71  typedef std::map<std::string, Token::Type> Dictionary;
 72
 73  void rewind(const char *);
 74
 75  Token::Type read_token(const char *&, size_t &);
 76  //. try to fill the token cache to contain
 77  //. at least o tokens. Returns false if
 78  //. there are not enough tokens.
 79  bool fill(size_t o);
 80
 81  //. skip till end of paren
 82  void skip_paren();
 83  //. skip till end of line
 84  void skip_line();
 85  //. skip __attribute__(...), ___asm__(...), ...
 86  void skip_attribute();
 87  //. skip __extension__(...).
 88  Token::Type skip_extension(const char *&, size_t &);
 89  //. skip __asm ...
 90  void skip_asm();
 91  //. skip __declspec(...).
 92  void skip_declspec();
 93  //. skip __pragma(...);.
 94  void skip_pragma();
 95
 96  char get_next_non_white_char();
 97  Token::Type read_line();
 98  bool read_char_const(unsigned long top);
 99  bool read_str_const(unsigned long top);
100  Token::Type read_number(char c, unsigned long top);
101  Token::Type read_float(unsigned long top);
102  Token::Type read_identifier(unsigned long top);
103  Token::Type screen(const char *identifier, size_t len);
104  Token::Type read_separator(char c, unsigned long top);
105  Token::Type single_char_op(unsigned char c);
106  Token::Type read_comment(char c, unsigned long top);
107
108  Buffer    *my_buffer;
109  Queue      my_tokens;
110  Dictionary my_keywords;
111  Token      my_token;
112  Comments   my_comments;
113};
114
115inline bool is_blank(char c)
116{
117  return c == ' ' || c == '\t' || c == '\f' || c == '\r';
118}
119
120inline bool is_letter(char c)
121{
122  return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || (c == '_' || c == '$');
123}
124
125inline bool is_digit(char c){ return '0' <= c && c <= '9';}
126
127inline bool is_xletter(char c){ return c == 'X' || c == 'x';}
128
129inline bool is_eletter(char c){ return c == 'E' || c == 'e';}
130
131inline bool is_hexdigit(char c)
132{
133  return is_digit(c) || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
134}
135
136inline bool is_int_suffix(char c)
137{
138  return c == 'U' || c == 'u' || c == 'L' || c == 'l';
139}
140
141inline bool is_float_suffix(char c)
142{
143  return c == 'F' || c == 'f' || c == 'L' || c == 'l';
144}
145
146}
147
148#endif