00001 // Copyright (C) 2001, Compaq Computer Corporation 00002 00003 // This file is part of Vesta. 00004 00005 // Vesta is free software; you can redistribute it and/or 00006 // modify it under the terms of the GNU Lesser General Public 00007 // License as published by the Free Software Foundation; either 00008 // version 2.1 of the License, or (at your option) any later version. 00009 00010 // Vesta is distributed in the hope that it will be useful, 00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 // Lesser General Public License for more details. 00014 00015 // You should have received a copy of the GNU Lesser General Public 00016 // License along with Vesta; if not, write to the Free Software 00017 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00018 00019 // Last modified on Sun Jan 8 20:10:24 EST 2006 by ken@xorian.net 00020 // modified on Mon Mar 30 10:33:49 PST 1998 by heydon 00021 // modified on Thu Jun 27 19:46:28 PDT 1996 by mann 00022 // modified on Tue Aug 8 14:39:20 PDT 1995 by levin 00023 00024 // Text.H -- a package for text strings 00025 // 00026 // A "Text" is an immutable sequence of characters. 00027 00028 #ifndef _TEXT_H 00029 #define _TEXT_H 00030 00031 #include <string> 00032 00033 #include "Basics.H" 00034 00035 class Text { 00036 public: 00037 /* In this interface, arguments of type "char *" named "str" are 00038 assumed to be null-terminated strings. */ 00039 00040 // constructors 00041 Text() throw (); 00042 Text(const Text& t) throw (); 00043 Text(const char c) throw (); 00044 Text(const char *str, void *copy = NULL) throw (); 00045 Text(const char *bytes, int len) throw (); 00046 Text(const std::string &str) throw(); 00047 /* These initialize the text to the empty string, a copy of the text "t", 00048 a string containing the single character "c", a copy of the null- 00049 terminated string "str", and the "len" chars pointed to by "bytes", 00050 respectively. In the latter case, it is an unchecked run-time error 00051 for any of the "len" bytes pointed to by "bytes" to be '\0'. In the 00052 case of initializing a Text from a null-terminated string "str", if 00053 the bytes pointed to by "str" are immutable and long-lived, an extra 00054 copy can be avoided by passing a non-NULL value for "copy". */ 00055 00056 // init from string 00057 void Init(const char *str) { this->s = str; } 00058 /* Initialize this text to "str" without any allocations. The 00059 client *must not* modify or de-allocate "str" after this call. 00060 This method is provided for cases where it is inconvenient to 00061 use the Text(const char*, void*) constructor above, but the 00062 client has just allocated "str" and can avoid a copy. */ 00063 00064 // destructor 00065 ~Text() throw (); 00066 00067 // assignment 00068 Text& operator = (const char* str) throw (); 00069 Text& operator = (const Text& t) throw (); 00070 00071 // relational operators 00072 friend bool operator == (const Text& t1, const Text& t2) throw (); 00073 friend bool operator != (const Text& t1, const Text& t2) throw (); 00074 friend bool operator < (const Text& t1, const Text& t2) throw () 00075 { return strcmp(t1.s, t2.s) < 0; } 00076 friend bool operator <= (const Text& t1, const Text& t2) throw () 00077 { return strcmp(t1.s, t2.s) <= 0; } 00078 friend bool operator > (const Text& t1, const Text& t2) throw () 00079 { return strcmp(t1.s, t2.s) > 0; } 00080 friend bool operator >= (const Text& t1, const Text& t2) throw () 00081 { return strcmp(t1.s, t2.s) >= 0; } 00082 00083 // concatenation 00084 friend Text operator + (const Text& t1, const Text& t2) throw (); 00085 friend Text operator + (const char* str, const Text& t) throw (); 00086 friend Text operator + (const Text& t, const char* str) throw (); 00087 00088 // destructive concatenation 00089 Text& operator += (const char* str) throw (); 00090 Text& operator += (const Text& t) throw (); 00091 00092 // conversion to (const char *) and (char *) 00093 // Note that these operations are unsafe, since they return 00094 // a pointer into the argument Text, which may be destroyed 00095 // before the pointer is discarded. 00096 const char *cchars() const throw () { return s; } 00097 char *chars() const throw () { return (char *)s; } 00098 // This operation is also unsafe because it permits subsequent 00099 // modification of the Text, which is supposed to be immutable. 00100 00101 // output 00102 friend std::ostream& operator << (std::ostream& os, const Text& t) throw () 00103 /* Write the text "t" to the stream "os". */ 00104 { return os << t.s; } 00105 00106 // "MaxInt" == the largest possible integer 00107 enum { MaxInt = (int)(~(1u << ((sizeof(int) * 8) - 1))) }; 00108 00109 // Other operations 00110 00111 char operator [] (int i) const throw () { return s[i]; } 00112 /* Return character "i" of the text. It is an unchecked run-time 00113 error if "i < 0" or "i >= Length()". */ 00114 00115 int Length() const throw () { return (int)strlen(this->s); } 00116 /* Return the number of characters in the text. */ 00117 00118 bool Empty() const throw () { return *(this->s) == '\0'; } 00119 /* Return "true" iff the text is empty. */ 00120 00121 Text Sub(int start, int len = MaxInt) const throw (); 00122 /* Return a sub-sequence of the text: empty if "len = 0" or if 00123 "start >= this->Length()"; otherwise, the subsequence ranging from 00124 "start" to the minimum of "start+len-1" and "this->Length()-1". It 00125 is an unchecked run-time error for "start < 0" or "len < 0". */ 00126 00127 int FindChar(char c, int start = 0) const throw (); 00128 /* Return the smallest index "i >= start" such that "c" is the "i"th 00129 character of this text (counting from 0) if one exists; otherwise, 00130 return -1. */ 00131 00132 int FindCharR(char c, int start = MaxInt) const throw (); 00133 /* Return the largest index "i <= start" such that "c" is the "i"th 00134 character of this text (counting from 0) if one exists; otherwise, 00135 return -1. */ 00136 00137 int FindText(const Text &substr, int start = 0) const throw (); 00138 /* Return the smallest index "i >= start" such that the text 00139 "substr" occurs in this text starting at position "i" (counting 00140 from 0); otherwise, return -1. */ 00141 00142 Word Hash() const throw (); 00143 /* Return a hash function of the text's contents. */ 00144 00145 static bool GCImpl() throw (); 00146 /* Return true iff the client has linked with the Text implementation that 00147 depends on being linked with a garbage-collector. This guarantees that 00148 the underlying storage for the text is not freed until there are no 00149 references to it. That fact can be used by implementations of 00150 subclasses such as "Atom" to avoid unnecessary copying. */ 00151 00152 Text WordWrap(const Text &prefix = "", unsigned int columns = 70) 00153 const throw (); 00154 /* Wrap the content of this text string at the whitesapce between 00155 words. Useful for formatting long messages. */ 00156 00157 Text PadLeft(unsigned int toLen, const Text &padding = " ") 00158 const throw(); 00159 Text PadRight(unsigned int toLen, const Text &padding = " ") 00160 const throw(); 00161 /* Pad the string on either the left or the right to at least a 00162 certain target length using a given padding string (which 00163 defaults to a space character). (This is intended to give a 00164 capability line "%5s" in printf.) The result string will 00165 always have exactly the requested length. If the padding 00166 string is longer than one character, the result may have a 00167 partial copy at the end of the passing. If the padding is the 00168 empty string, an assertion will fail. */ 00169 00170 protected: 00171 // The text is represented by a constant null-terminated string 00172 const char *s; 00173 }; 00174 00175 /* Memory Management: 00176 00177 All methods in this interface that produce new "Text" values allocate 00178 new buffers for those texts. In particular, the methods that allocate 00179 new buffers for their results are: 00180 00181 * the single-argument constructors 00182 * the "operator =" (assignment) methods 00183 * the "operator +" (concatenation) methods 00184 * the "operator +=" (destructive concatenation) methods 00185 * the "Sub" (substring) method 00186 00187 The destructor for a "Text" deletes the buffer associated with 00188 it (if any). Moreover, the "operator =" (assignment) and "operator +=" 00189 (destructive concatenation) methods delete the original buffer associated 00190 with their first argument. 00191 00192 The (const char *) and (char *) casting operators return a pointer to the 00193 buffer associated with the text. Hence, if the Text is destroyed (via the 00194 destructor or one of the destructive operators), the pointer becomes 00195 invalid. */ 00196 00197 #endif // _TEXT_H