Below is the file 'basic_io.hh' from this revision. You can also download the file.
#ifndef __BASIC_IO_HH__ #define __BASIC_IO_HH__ // copyright (C) 2004 graydon hoare <graydon@pobox.com> // all rights reserved. // licensed to the public under the terms of the GNU GPL (>= 2) // see the file COPYING for details // this file provides parsing and printing primitives used by the higher // level parser and printer routines for the two datatypes change_set and // revision_set. every revision_set contains a number of change_sets, so // their i/o routines are somewhat related. #include <iosfwd> #include <string> #include <vector> #include <map> #include "paths.hh" #include "sanity.hh" namespace basic_io { inline bool is_xdigit(char x) { return ((x >= '0' && x <= '9') || (x >= 'a' && x <= 'f') || (x >= 'A' && x <= 'F')); } inline bool is_alpha(char x) { return ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z')); } inline bool is_alnum(char x) { return ((x >= '0' && x <= '9') || (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z')); } inline bool is_space(char x) { return (x == ' ') || (x == '\n') || (x == '\t') || (x == '\r') || (x == '\v') || (x == '\f'); } typedef enum { TOK_SYMBOL, TOK_STRING, TOK_HEX, TOK_NONE } token_type; struct input_source { size_t line, col; std::string const & in; std::string::const_iterator curr; std::string name; int lookahead; char c; input_source(std::string const & in, std::string const & nm) : line(1), col(1), in(in), curr(in.begin()), name(nm), lookahead(0), c('\0') {} inline void peek() { if (LIKELY(curr != in.end())) lookahead = *curr; else lookahead = EOF; } inline void advance() { if (LIKELY(curr != in.end())) { c = *curr; ++curr; ++col; if (c == '\n') { col = 1; ++line; } } peek(); } void err(std::string const & s); }; struct tokenizer { input_source & in; std::string::const_iterator begin; std::string::const_iterator end; tokenizer(input_source & i) : in(i), begin(in.curr), end(in.curr) {} inline void mark() { begin = in.curr; end = begin; } inline void advance() { in.advance(); end = in.curr; } inline void store(std::string & val) { val.assign(begin, end); } inline token_type get_token(std::string & val) { in.peek(); while (true) { if (UNLIKELY(in.lookahead == EOF)) return TOK_NONE; if (!is_space(in.lookahead)) break; in.advance(); } if (is_alpha(in.lookahead)) { mark(); while (is_alnum(in.lookahead) || in.lookahead == '_') advance(); store(val); return basic_io::TOK_SYMBOL; } else if (in.lookahead == '[') { in.advance(); mark(); while (static_cast<char>(in.lookahead) != ']') { if (UNLIKELY(in.lookahead == EOF)) in.err("input stream ended in hex string"); if (UNLIKELY(!is_xdigit(in.lookahead))) in.err("non-hex character in hex string"); advance(); } store(val); if (UNLIKELY(static_cast<char>(in.lookahead) != ']')) in.err("hex string did not end with ']'"); in.advance(); return basic_io::TOK_HEX; } else if (in.lookahead == '"') { in.advance(); mark(); while (static_cast<char>(in.lookahead) != '"') { if (UNLIKELY(in.lookahead == EOF)) in.err("input stream ended in string"); if (UNLIKELY(static_cast<char>(in.lookahead) == '\\')) { // Possible escape: we understand escaped quotes and // escaped backslashes. Nothing else. If we // happen to // hit an escape, we stop doing the mark/store // thing // and switch to copying and appending per-character // until the // end of the token. // So first, store what we have *before* the escape. store(val); // Then skip over the escape backslash. in.advance(); // Make sure it's an escape we recognize. if (UNLIKELY(!(static_cast<char>(in.lookahead) == '"' || static_cast<char>(in.lookahead) == '\\'))) in.err("unrecognized character escape"); // Add the escaped character onto the accumulating token. in.advance(); val += in.c; // Now enter special slow loop for remainder. while (static_cast<char>(in.lookahead) != '"') { if (UNLIKELY(in.lookahead == EOF)) in.err("input stream ended in string"); if (UNLIKELY(static_cast<char>(in.lookahead) == '\\')) { // Skip over any further escape marker. in.advance(); if (UNLIKELY(!(static_cast<char>(in.lookahead) == '"' || static_cast<char>(in.lookahead) == '\\'))) in.err("unrecognized character escape"); } in.advance(); val += in.c; } // When slow loop completes, return early. if (static_cast<char>(in.lookahead) != '"') in.err("string did not end with '\"'"); in.advance(); return basic_io::TOK_STRING; } advance(); } store(val); if (UNLIKELY(static_cast<char>(in.lookahead) != '"')) in.err("string did not end with '\"'"); in.advance(); return basic_io::TOK_STRING; } else return basic_io::TOK_NONE; } void err(std::string const & s); }; std::string escape(std::string const & s); struct stanza { stanza(); size_t indent; std::vector<std::pair<std::string, std::string> > entries; void push_hex_pair(std::string const & k, std::string const & v); void push_hex_triple(std::string const & k, std::string const & n, std::string const & v); void push_str_pair(std::string const & k, std::string const & v); void push_str_triple(std::string const & k, std::string const & n, std::string const & v); void push_file_pair(std::string const & k, file_path const & v); void push_str_multi(std::string const & k, std::vector<std::string> const & v); }; struct printer { bool empty_output; std::ostream & out; printer(std::ostream & ost); void print_stanza(stanza const & st); }; struct parser { tokenizer & tok; parser(tokenizer & t) : tok(t) { token.reserve(128); advance(); } std::string token; token_type ttype; void err(std::string const & s); std::string tt2str(token_type tt); inline void advance() { ttype = tok.get_token(token); } inline void eat(token_type want) { if (ttype != want) err("wanted " + tt2str(want) + ", got " + tt2str(ttype) + (token.empty() ? std::string("") : (std::string(" with value ") + token))); advance(); } inline void str() { eat(basic_io::TOK_STRING); } inline void sym() { eat(basic_io::TOK_SYMBOL); } inline void hex() { eat(basic_io::TOK_HEX); } inline void str(std::string & v) { v = token; str(); } inline void sym(std::string & v) { v = token; sym(); } inline void hex(std::string & v) { v = token; hex(); } inline bool symp() { return ttype == basic_io::TOK_SYMBOL; } inline bool symp(std::string const & val) { return ttype == basic_io::TOK_SYMBOL && token == val; } inline void esym(std::string const & val) { if (!(ttype == basic_io::TOK_SYMBOL && token == val)) err("wanted symbol '" + val + + "', got " + tt2str(ttype) + (token.empty() ? std::string("") : (std::string(" with value ") + token))); advance(); } }; } #endif // __BASIC_IO_HH__