00001 // Copyright (C) 2001, Compaq Computer Corporation 00002 // 00003 // This file is part of Vesta. 00004 // 00005 // Vesta is free software; you can redistribute it and/or 00006 // modify it under the terms of the GNU Lesser General Public 00007 // License as published by the Free Software Foundation; either 00008 // version 2.1 of the License, or (at your option) any later version. 00009 // 00010 // Vesta is distributed in the hope that it will be useful, 00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 // Lesser General Public License for more details. 00014 // 00015 // You should have received a copy of the GNU Lesser General Public 00016 // License along with Vesta; if not, write to the Free Software 00017 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00018 00019 // 00020 // VestaLog.H 00021 // Last modified on Thu Jul 15 16:28:20 EDT 2004 by ken@xorian.net 00022 // modified on Thu Jul 6 12:03:58 PDT 2000 by mann 00023 // 00024 // Log changes to the repository state 00025 // 00026 00027 #ifndef _VLOG 00028 #define _VLOG 00029 00030 #include "Basics.H" 00031 #include "Text.H" 00032 #include <fstream> 00033 00034 // INTRODUCTION 00035 00036 // A VestaLog is a persistent bytestream with atomic append. 00037 // Checkpointing and recovery features are also provided. 00038 00039 // The log does not maintain record boundaries; it is up to the 00040 // client to write its data in a format that can be read back later. 00041 00042 // VestaLog provides no locking of any kind, not even to protect its 00043 // own data structures; clients are responsible for this. Only one 00044 // client thread should use the log at a time. 00045 00046 // ATOMIC APPEND 00047 00048 // After calling start, a client can tentatively append bytes, then 00049 // either commit (atomically make the append permanent), or abort 00050 // (return the log to its state at the time of the start). A crash 00051 // cleanly aborts any ongoing append. A client can call start 00052 // multiple times before calling commit or abort: start increments a 00053 // counter that is decremented by commit, and the bytes are committed 00054 // only when the counter reaches zero. One abort satisfies all 00055 // pending starts, however. 00056 00057 // CHECKPOINTING 00058 00059 // A checkpoint is a file that is effectively created (including its 00060 // complete contents) atomically. The checkpointBegin operation opens 00061 // a new checkpoint for writing, while checkpointEnd marks the 00062 // checkpoint as committed. If there is a crash before checkpointEnd 00063 // is called, the uncommitted checkpoint is not seen at recovery time. 00064 // The checkpointAbort operation also marks the current checkpoint as 00065 // uncommitted; it allows you to change your mind about writing a 00066 // checkpoint. 00067 00068 // At checkpoint time, logging switches to a new, empty log file. 00069 // This supports recovering from the most recent checkpoint plus the 00070 // log records written since the checkpoint. To support fuzzy 00071 // checkpointing (that is, checkpointing in parallel with logging new 00072 // updates), the switch to a new log file is made at checkpointBegin 00073 // time, and it is permitted to write log records between 00074 // checkpointBegin and checkpointEnd. This creates an extra 00075 // complication if there is a crash during checkpointing: the log 00076 // records written since the last committed checkpoint may be spread 00077 // across multiple log files. Handling this situation during recovery 00078 // is discussed below. 00079 00080 // By default, if there is a crash during checkpointing, the 00081 // checkpoint that was in progress is aborted. Alternatively, the 00082 // checkpointResume method lets you resume writing the same checkpoint 00083 // you were working on before the crash. 00084 00085 // Committing a new checkpoint does not delete old logs or 00086 // checkpoints. If desired, this deletion can be done by calling the 00087 // prune method. Alternatively, the old data can be retained as a 00088 // backup against damaged or corrupt checkpoints; you can choose to 00089 // recover from an older committed checkpoint plus the log records 00090 // written since, or even entirely from log records. 00091 00092 // RECOVERY 00093 00094 // When a log is first opened, it is in "recovering" state, ready for 00095 // reading. By default, open arranges to begin reading at the start 00096 // of the log file associated with the most recent committed 00097 // checkpoint, or from the 0th log file if there have been no 00098 // checkpoints as yet. The client can begin recovery at an earlier 00099 // checkpoint by passing the checkpoint's version number to the open 00100 // method (see below). 00101 00102 // The openCheckpoint method gets the checkpoint file (if any) 00103 // associated with the current log file. Normally you will call this 00104 // method once, immediately after opening the log, in order to process 00105 // the checkpoint itself before processing the subsequent log records. 00106 00107 // The reading methods read bytes from the log. They throw Eof when 00108 // there are no more records in the current log file. At this point 00109 // the client must call nextLog in case there are more log files. If 00110 // nextLog returns true, the client must continue calling reading 00111 // methods to read the rest of the log; if false, it may call 00112 // loggingBegin to enter normal operation and begin appending new 00113 // records to the log. 00114 00115 // Pseudocode for recovery: 00116 // 00117 // VestaLog log; 00118 // log.open("."); 00119 // fstream* ckp = log.openCheckpoint(); 00120 // // Restore state from ckp 00121 // ... 00122 // ckp.close(); 00123 // // Apply log records beyond ckp 00124 // for (;;) { 00125 // try { 00126 // char c; 00127 // log.get(c); // or other reading method 00128 // ... 00129 // } catch (VestaLog::Eof) { 00130 // if (!log.nextLog()) break; 00131 // } 00132 // } 00133 // log.loggingBegin(); 00134 00135 // READING THE LOG DURING CHECKPOINTING 00136 00137 // If you need to read the old log in order to construct a checkpoint, 00138 // but you want to avoid reading new records written after you started 00139 // the checkpoint, use the logVersion method to determine where to 00140 // stop. This method returns the version number of the log file that 00141 // is currently open. The version number increases after a successful 00142 // call to nextLog or a call to checkpointBegin. Call logVersion on 00143 // the VestaLog object you are writing to, immediately after calling 00144 // checkpointBegin, and let lv be the value returned. Open the same 00145 // log again and read from it in the normal way, but stop as soon as 00146 // logVersion returns lv. 00147 00148 // Pseudocode for reading the log during checkpointing: 00149 // 00150 // VestaLog wl; // log object being written to 00151 // ... 00152 // fstream* ckp = wl.checkpointBegin(); 00153 // int lv = wl.logVersion(); 00154 // VestaLog rl; // same log, object being read 00155 // rl.open(".", -1, true); 00156 // for (;;) { 00157 // try { 00158 // // read from rl, write to ckp 00159 // } catch (VestaLog::Eof) { 00160 // bool more = rl.nextLog(); 00161 // assert(more); // there is always >= 1 to read, plus one to ignore 00162 // if (rl.logVersion() >= lv) break; 00163 // } 00164 // } 00165 // rl.close(); 00166 // ckp.close(); 00167 // wl.checkpointEnd(); 00168 00169 // FINDING OLD CHECKPOINTS AND LOGS 00170 00171 // Currently no methods are provided to determine which old 00172 // checkpoint version numbers are committed (and thus useful to pass 00173 // as the optional second argument to open). One can get this 00174 // information by examining the directory where the package stores its 00175 // files. Files are named as follows, where # is a decimal digit 00176 // string: 00177 00178 // - version contains the highest checkpoint number that is committed. 00179 // - version.new is an auxiliary file used in atomic checkpointing. 00180 // - pruned contains the highest checkpoint number that has been pruned. 00181 // - pruned.new is an auxiliary file used in pruning. 00182 // - #.log is the log beginning at the #.ckp checkpoint. 00183 // - The first log is 0.log, and there is no 0.ckp checkpoint. 00184 // - If #.ckp is present and # <= version, then #.ckp is a committed 00185 // checkpoint. Any #.ckp files with # > version must be ignored. 00186 00187 // LOG AND CHECKPOINT ONLINE BACKUP 00188 00189 // You can have the log (and optionally also checkpoints) written to a 00190 // second directory, as an on-line backup against damaged or lost 00191 // files. If log backup is on, all writes go to both logs, and 00192 // commit() does not return until both logs are synced. During 00193 // recovery, both logs are read in parallel. Any data that did not 00194 // reach both logs is considered uncommitted and ignored. When 00195 // loggingBegin is called, the backup log is made identical to the 00196 // primary log. 00197 00198 // (A subtle consequence of this algorithm is that you may see a 00199 // shorter log when recovering from both primary and backup than if 00200 // one of the logs is lost and you recover from the other alone. This 00201 // will happen if a commit() was in progress at the time of the last 00202 // crash and data up to the commit point had been fully written to one 00203 // log but not the other.) 00204 00205 // (!!It should be possible to speed up recovery a bit by reading from 00206 // only one log except for the portion between the last two commit 00207 // points, but this would complicate the recovery code and has not 00208 // been implemented.) 00209 00210 // No methods are currently provided to deal with the loss of a log or 00211 // to copy a log to make a new backup. If the specified primary and 00212 // backup logs are not both available at recovery time, open() will 00213 // raise the Error exception. To fix the problem, you will have to 00214 // determine which log directory is intact (the message printed with 00215 // the Error exception will help here) and manually copy it to the 00216 // other log directory. A log directory must be copied only when 00217 // the log is not open for writing. If one log appears to be lost and 00218 // you choose to recover from the other, it is important to (1) copy 00219 // the remaining log before starting recovery so that you are not 00220 // running with only one, and (2) make certain that the "lost" log 00221 // cannot be found later and inadvertently used (either alone or as 00222 // part of a primary/backup pair), since it will now be outdated and 00223 // incorrect. 00224 00225 // If you ask for checkpoints to be backed up, the checkpointEnd 00226 // method makes a copy of the checkpoint file and stores it in 00227 // the backup log directory, then updates the version files in both 00228 // directories, before returning. The backup checkpoints are never 00229 // read in normal operation; they are kept only in case you ever lose 00230 // the primary and need to promote the backup log directory to 00231 // primary. 00232 00233 class VestaLogPrivate; 00234 00235 class VestaLog { 00236 public: 00237 class Exception {}; 00238 00239 // Hit end of current log file 00240 class Eof : public Exception {}; 00241 00242 // Log format error or I/O error. When Error is thrown, the log 00243 // enters state "bad". Only the close method is valid in this state. 00244 class Error : public Exception { 00245 public: 00246 int r; // OS errno, or 0 if not an OS error 00247 Text msg; 00248 inline Error() { }; 00249 inline Error(int r, const Text &msg) { 00250 this->r = r; this->msg = msg; }; 00251 inline Error(const Error &f) { 00252 this->r = f.r; this->msg = f.msg; }; 00253 inline Error& operator=(const Error &f) { 00254 this->r = f.r; this->msg = f.msg; return *this; }; 00255 }; 00256 00257 VestaLog() throw(); 00258 00259 // Open a log. State: initial -> recovering & !checkpointing. 00260 // dir = directory holding the log files. ver = checkpoint 00261 // version to start at; -1 means the latest version with a 00262 // committed checkpoint, or 0 if there are no committed 00263 // checkpoints. readonly = true for read-only access. 00264 // If lock == true, get an advisory lock on a file in the lock 00265 // directory (a read lock if readonly, else a write lock), 00266 // throwing an error if there is a lock conflict. 00267 // If dir2 != NULL, write a backup of the log in the 00268 // given directory. If bakckp == true, also backup checkpoint 00269 // files in dir2. dir2 must != NULL if bakckp is true. 00270 void open(char *dir, int ver =-1, bool readonly =false, 00271 bool lock =false, char* dir2 =NULL, bool bakckp =false) 00272 throw(Error); 00273 00274 // Return the version number of the logfile that is currently 00275 // open, and (if checkpointing) the checkpoint that is currently 00276 // being written. This number increases when nextLog returns true 00277 // or checkpointBegin is called. State: !initial & !bad. 00278 int logVersion() throw(Error); 00279 00280 // Open (for reading) the checkpoint that the current log file 00281 // starts from. Returns NULL if 0.log is current. Throws Error 00282 // if the checkpoint associated with the current log file was not 00283 // committed. State: recovering. 00284 std::fstream *openCheckpoint() throw(Error); 00285 00286 // Read one character. State: recovering. 00287 void get(char& c) throw(Eof, Error); 00288 00289 // Read n bytes, or read up to (but not including) the next 00290 // "term" character, whichever is less. State: recovering. 00291 void get(char* p, int n, char term='\n') throw(Eof, Error); 00292 00293 // Read n bytes or read up to Eof, whichever is less. Return 00294 // number of bytes read. State: recovering. 00295 int read(char* p, int n) throw(Error); 00296 00297 // Read exactly n bytes. State: recovering. 00298 void readAll(char* p, int n) throw(Eof, Error); 00299 00300 // Test for end of current log file. State: recovering. 00301 bool eof() throw(Error); 00302 00303 // Begin reading log records from the next log file. Before 00304 // calling this method, you must have read up to Eof with get or 00305 // read. Returns true if there is a next log file and leaves the 00306 // log object in recovering state; otherwise returns false and 00307 // leaves the object in recovered state. State: recovering -> 00308 // (recovering | recovered). 00309 bool nextLog() throw(Error); 00310 00311 // Ready to start logging. (Even if you know in advance that the 00312 // log is empty, you must get into the recovered state before 00313 // calling loggingBegin(), by calling get, read, readAll, or eof 00314 // at least once, then calling nextLog() once.) Illegal if you 00315 // called open with readonly=true. State: recovered -> ready. 00316 void loggingBegin() throw(Error); 00317 00318 // If the current thread has already called start(), assert state 00319 // == logging and increment the nesting level of starts. 00320 // Otherwise set nesting = 1, state = logging, and start a record. 00321 // State: (ready | logging) -> logging. 00322 void start() throw(Error); 00323 00324 // Return the nesting level of starts. State: * 00325 int nesting() throw(); 00326 00327 // Write one character. State: logging. 00328 void put(char c) throw(Error); 00329 00330 // Write a NUL-terminated string. State: logging. 00331 void put(const char *p) throw(Error); 00332 00333 // Write n bytes. State: logging. 00334 void write(const char *p, int n) throw(Error); 00335 00336 // Commit the current record if --nesting == 0. 00337 // State: logging -> (--nesting == 0) ? ready : logging. 00338 void commit() throw(Error); 00339 00340 // Abort the current record and set nesting = 0. 00341 // State: logging -> ready. 00342 void abort() throw(Error); 00343 00344 // Open (for writing) a file to receive a new checkpoint and 00345 // return it. Switch logging into a new log file. State: ready & 00346 // !checkpointing -> ready & checkpointing. 00347 std::fstream *checkpointBegin(std::ios::openmode mode =std::ios::out) throw(Error); 00348 00349 // Atomically commit the current checkpoint. 00350 // State: ready & checkpointing -> ready & !checkpointing. 00351 void checkpointEnd() throw(Error); 00352 00353 // Abort the current checkpoint. State: ready & checkpointing -> 00354 // ready & !checkpointing. 00355 void checkpointAbort() throw(Error); 00356 00357 // Reopen (for writing) a checkpoint that was being written at 00358 // the time of the last crash but was not yet committed. Will not 00359 // resume a checkpoint that has been aborted with checkpointAbort. 00360 // Returns NULL if there is no such checkpoint. Illegal if you 00361 // called open with readonly=true. State: recovered & 00362 // !checkpointing -> recovered & checkpointing. 00363 std::fstream *checkpointResume(std::ios::openmode mode =std::ios::out|std::ios::trunc) 00364 throw(Error); 00365 00366 // Delete old committed checkpoints and logs. ckpkeep = number 00367 // of committed checkpoints to retain. The conceptual empty 0th 00368 // checkpoint can be included in this count. It is not an error 00369 // to request keeping more checkpoints than are in existence. 00370 // If logkeep = false, delete any logs whose version number precedes 00371 // the oldest checkpoint kept. If logkeep = true, keep all logs; 00372 // uniformly keeping logs permits recovery entirely from logs (i.e., 00373 // from the 0th checkpoint). It is legal to set ckpkeep = 0, but 00374 // doing so will make recovery impossible unless you have kept all 00375 // your logs. If prunebak = true and a backup log directory was 00376 // specified at open time, prune it too; otherwise don't. 00377 // State: !initial & !bad. 00378 void prune(int ckpkeep, bool logkeep =false, bool prunebak =true) 00379 throw(Error); 00380 00381 // Stop whatever we're doing. State: * -> initial. 00382 void close() throw(); 00383 00384 private: 00385 VestaLogPrivate *vlp; 00386 }; 00387 00388 #endif // _VLOG