00001 // Copyright (C) 2001, Compaq Computer Corporation 00002 // 00003 // This file is part of Vesta. 00004 // 00005 // Vesta is free software; you can redistribute it and/or 00006 // modify it under the terms of the GNU Lesser General Public 00007 // License as published by the Free Software Foundation; either 00008 // version 2.1 of the License, or (at your option) any later version. 00009 // 00010 // Vesta is distributed in the hope that it will be useful, 00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 // Lesser General Public License for more details. 00014 // 00015 // You should have received a copy of the GNU Lesser General Public 00016 // License along with Vesta; if not, write to the Free Software 00017 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00018 00019 // Last modified on Fri Apr 29 17:58:36 EDT 2005 by ken@xorian.net 00020 // modified on Fri Nov 7 16:59:46 PST 1997 by heydon 00021 00022 // CacheEntry.H -- defines the cache entry class "CE::T" (and lists thereof) 00023 00024 #ifndef _CACHE_ENTRY_H 00025 #define _CACHE_ENTRY_H 00026 00027 #include <Basics.H> 00028 #include <FS.H> 00029 #include <VestaLog.H> 00030 #include <Recovery.H> 00031 #include <FP.H> 00032 #include <BitVector.H> 00033 #include <Model.H> 00034 #include <CacheIndex.H> 00035 #include <VestaVal.H> 00036 00037 #include "IntIntTblLR.H" 00038 #include "Combine.H" 00039 00040 namespace CE 00041 { 00042 // CE::T -- A cache entry 00043 00044 /* The methods of a "CE::T" are unsynchronized. However, each entry 00045 "ce" is naturally associated with a "VPKFile". We denote this by 00046 "VPKFile(ce)". The methods of a "CE::T" that read or write 00047 potentially changing parts of the entry state require that the lock 00048 "VPKFile(SELF).mu" be held. */ 00049 00050 class T { 00051 public: 00052 // constructors/destructor 00053 T(CacheEntry::Index ci, BitVector *uncommonNames, FP::List *fps, 00054 IntIntTblLR *imap, VestaVal::T *value, CacheEntry::Indices *kids, 00055 Model::T model) throw (); 00056 /* REQUIRES VPKFile(SELF).mu IN LL */ 00057 /* Initialize the new cache entry, where "ci" is the index of the 00058 entry and "uncommonNames" is the set of uncommon free variables 00059 for this entry (relative to the names stored in "VPKFile(SELF)"). 00060 "fps[imap(i)]" is the fingerprint of the "i"th name in the list of 00061 names "VPKFile(SELF).allNames", "value" is the entry's value, 00062 "kids" are the indices of this entry's children, and "model" is the 00063 model for this entry. All pointer arguments are required to be 00064 non-NULL. 00065 00066 As opposed to the "uncommonNames" bit vector, the "fps" array is 00067 not sparse. It contains exactly as many values as there are free 00068 variables for this entry. 00069 00070 The responsibility for deleting the space for "uncommonNames", 00071 "fps", "imap", "value", and "kids" is transferred from caller to 00072 callee. */ 00073 00074 T(const T *ce) throw (); 00075 /* REQUIRES VPKFile(SELF).mu IN LL */ 00076 /* Make a copy of the entry "ce". This copies the mutable fields and 00077 uses references for the immutable fields where possible. It is like 00078 a copy constructor, but requires an explicit pointer to an existing 00079 entry as an argument, so it is not likely to be used inadvertently. 00080 The regular copy constructor is still private. */ 00081 00082 T(RecoveryReader &rd) throw (VestaLog::Error, VestaLog::Eof) 00083 { Recover(rd); } 00084 /* REQUIRES VPKFile(SELF).mu IN LL */ 00085 /* Recover the cache entry from "rd" including the ``extra'' 00086 fields "imap" and "fps". */ 00087 00088 T(std::ifstream &ifs, bool readImmutable = true) 00089 throw (FS::EndOfFile, FS::Failure) 00090 { Read(ifs, readImmutable); } 00091 /* REQUIRES VPKFile(SELF).mu IN LL */ 00092 /* Read the cache entry from "ifs". This method does not read 00093 the ``extra'' fields "imap" and "fps"; use the "ReadExtras" 00094 routine below to read them. See the "Read" method below for 00095 the meaning of the "readImmutable" argument. */ 00096 00097 // computation functions 00098 FP::Tag *CombineFP(const BitVector& bv) const throw () 00099 /* REQUIRES VPKFile(SELF).mu IN LL */ 00100 /* Returns a new fingerprint that is the result of combining the 00101 fingerprints "fps[imap(i)]" for all "i" such that "bv.Read(i)". 00102 It is an unchecked runtime error for "bv" to have a bit set 00103 whose index is not in the domain of "imap". */ 00104 { return NEW_PTRFREE_CONSTR(Combine::FPTag, (*fps, bv, imap)); } 00105 00106 bool FPMatch(const FP::List& pkFPs) throw (); 00107 /* REQUIRES VPKFile(SELF).mu IN LL */ 00108 /* Return "true" iff the fingerprints "pkFPs" constitute a match 00109 against the uncommon FP's stored in this entry. The "uncommonNames" 00110 bit vector for this entry is used to compute the combined uncommon 00111 fingerprint for the fingerprints "pkFPs", and the result is 00112 compared against the entry's combined uncommon fingerprint. */ 00113 00114 void Pack(const BitVector *packMask, const IntIntTblLR *reMap) 00115 throw (); 00116 /* REQUIRES VPKFile(SELF).mu IN LL */ 00117 /* REQUIRES (packMask == NULL) == (reMap == NULL) */ 00118 /* Pack this enty's "uncommonNames" and "imap" according to "packMask" 00119 and "reMap". If both "packMask" and "reMap" are NULL, this is a 00120 no-op. Otherwise, the entry's fields are updated to reflect the 00121 fact that names corresponding to unset bits in "packMask" have been 00122 deleted from this entry's PKFile's "allNames" list. 00123 00124 In particular, the entry's "uncommonNames" are packed according 00125 to "packMask": all bits in "uncommonNames" corresponding to unset 00126 bits in "packMask" are removed (these bits are all required to be 00127 unset in "uncommonNames" as well), and the remaining bits are 00128 packed toward the LSB. "imap" is updated according to "reMap": 00129 each entry "k -> v" is replaced by the entry "k' -> v", where 00130 "k -> k'" in "reMap". */ 00131 00132 void CycleNames(const BitVector &delBV, 00133 const IntIntTblLR &delMap) throw (); 00134 /* REQUIRES VPKFile(SELF).mu IN LL */ 00135 /* REQUIRES !delBV.IsEmpty() */ 00136 /* Update this cache entry's "uncommonNames" and "imap" fields to 00137 reflect the fact that names with indices corresponding to set bits 00138 in "delBV" are being assigned new indices according to 00139 "delMap". 00140 00141 In particular, the domain of "delMap" is precisely the set of 00142 set bits in "delBV"; the entry "old -> new" in "delMap" means 00143 that names with index "old" should now have index "new". The 00144 corresponding ``old'' bits in "uncommonNames" are required to all 00145 be set; they are reset and the corresponding ``new'' bits (which 00146 are required to all be initially reset) are set. Similarly, for 00147 each entry "old -> new" in "delMap", the enty "old -> ix" is 00148 replaced by the enty "new ->ix" in "imap". "imap" is required to 00149 have all of the ``old'' values in its initial domain, but none of 00150 the ``new'' values. */ 00151 00152 void Update(const BitVector *exCommonNames, 00153 const BitVector *exUncommonNames, const BitVector *packMask, 00154 const IntIntTblLR *reMap) throw (); 00155 /* REQUIRES VPKFile(SELF).mu IN LL */ 00156 /* REQUIRES (packMask == NULL) == (reMap == NULL) */ 00157 /* Update this entry's "uncommonNames" (and corresponding uncommon 00158 fingerprint) according to the arguments. Bits in "exCommonNames" 00159 (if it is non-NULL) are set in "uncommonNames", and bits in 00160 "exUncommonNames" (if it is non-NULL) are reset in "uncommonNames". 00161 If "uncommonNames" is changed at all, the uncommon fingerprint is 00162 recomputed. Finally, "uncommonNames" and "imap" are ``packed'' 00163 according to "packMask" and "reMap" as described by the "Pack" 00164 method above. */ 00165 00166 void XorUncommonNames(const BitVector &mask) throw (); 00167 /* REQUIRES VPKFile(SELF).mu IN LL */ 00168 /* Exclusive-OR the "mask" bits with this entry's "uncommonNames", 00169 and recompute the entry's "uncommonTag" since the set of uncommon 00170 names has changed. */ 00171 00172 void UnlazyUncommonFP() throw () 00173 { this->uncommonTag.UnlazyFPVal(*fps, *uncommonNames, imap); } 00174 /* REQUIRES VPKFile(SELF).mu IN LL */ 00175 /* Unlazy the tag of the uncommon names. */ 00176 00177 bool UncommonFPIsUnlazied() throw () 00178 { return this->uncommonNames->Size() == 0 00179 || this->uncommonTag.FPValIsUnlazied(); } 00180 00181 // Consistency checking 00182 static bool CheckUsedNames(const BitVector *commonNames, 00183 const BitVector *uncommonNames, 00184 const IntIntTblLR* imap, 00185 const FP::List *fps, 00186 /*OUT*/ unsigned int &missing) throw (); 00187 bool CheckUsedNames(const BitVector *commonNames, 00188 /*OUT*/ unsigned int &missing) 00189 const throw () 00190 { 00191 return CE::T::CheckUsedNames(commonNames, this->uncommonNames, 00192 this->imap, this->fps, 00193 missing); 00194 } 00195 /* REQUIRES VPKFile(SELF).mu IN LL */ 00196 /* Check that all names used by this entry (determined from 00197 imap/fps) are either in this entry's uncommonNames or the 00198 passed commonNames. (Note that commonNames can be NULL if 00199 this is a new uncommon entry, in which case its own 00200 uncommonNames should include all the names the entry uses.) 00201 The result indicates whether there are any names missing 00202 from the union of uncommonNames and commonNames. If it 00203 returns true (indicating a problem), "missing" is set to 00204 the index of the first name found not marked as used. The 00205 class member (static version) is provided to make it 00206 possible to perform this consistency check prior to 00207 creating a CE::T object. */ 00208 00209 // Accessor functions 00210 const BitVector* UncommonNames() const throw () 00211 { return this->uncommonNames; } 00212 /* REQUIRES VPKFile(SELF).mu IN LL */ 00213 CacheEntry::Index CI() const throw () { return this->ci; } 00214 const VestaVal::T* Value() const throw () { return this->value; } 00215 const CacheEntry::Indices *Kids() const throw () { return this->kids; } 00216 const IntIntTblLR *IMap() const throw () { return this->imap; } 00217 /* REQUIRES VPKFile(SELF).mu IN LL */ 00218 const FP::List *FPs() const throw () { return this->fps; } 00219 00220 // log/recover 00221 void Log(VestaLog& log) const throw (VestaLog::Error); 00222 /* REQUIRES VPKFile(SELF).mu IN LL */ 00223 /* Append this cache entry to "log", including the ``extra'' 00224 fields "imap" and "fps". Requires "log" to be in the 00225 "logging" state. */ 00226 void Recover(RecoveryReader &rd) 00227 throw (VestaLog::Error, VestaLog::Eof); 00228 /* REQUIRES VPKFile(SELF).mu IN LL */ 00229 /* Recover the cache entry from "rd", including the ``extra'' 00230 fields "imap" and "fps". */ 00231 00232 // write/read 00233 /* Note: neither "Write" nor "Read" writes/reads the ``extra'' fields 00234 "imap" and "fps"; use the "WriteExtras" and "ReadExtras" methods 00235 below to write/read them. */ 00236 void Write(std::ostream &ofs, std::ifstream *ifs = NULL) const 00237 throw (FS::Failure); 00238 /* REQUIRES VPKFile(SELF).mu IN LL */ 00239 /* If the immutable fields "value" and "kids" were not read into 00240 memory, but are instead represented by portions of a file, then 00241 "ifs" must be non-NULL; its bytes are written from there. */ 00242 void Read(std::istream &ifs, bool readImmutable = true) 00243 throw (FS::EndOfFile, FS::Failure); 00244 /* REQUIRES VPKFile(SELF).mu IN LL */ 00245 /* The immutable fields "value" and "kids" are read into memory by 00246 "Read" iff "readImmutable" is "true". If "readImmutable" is false, 00247 those fields are instead set to pointers to "ImmutableVal" objects 00248 that record the starting position and length of the values in 00249 "ifs". */ 00250 00251 // write/read extras 00252 void WriteExtras(std::ostream &ofs) const throw (FS::Failure); 00253 /* REQUIRES VPKFile(SELF).mu IN LL */ 00254 void ReadExtras(std::istream &ifs) throw (FS::EndOfFile, FS::Failure); 00255 /* REQUIRES VPKFile(SELF).mu IN LL */ 00256 00257 // print 00258 void Debug(std::ostream &os, int indent = 0) const throw (); 00259 /* REQUIRES VPKFile(SELF).mu IN LL */ 00260 void DebugExtras(std::ostream &os, int indent = 0) const throw (); 00261 /* REQUIRES VPKFile(SELF).mu IN LL */ 00262 00263 private: 00264 // data fields 00265 CacheEntry::Index ci; // readonly after init; see below 00266 Model::T model; // readonly after init 00267 BitVector *uncommonNames; // mutable 00268 Combine::XorFPTag uncommonTag; // mutable 00269 VestaVal::T *value; // readonly after init; see below 00270 CacheEntry::Indices *kids; // readonly after init; see below 00271 00272 // "extras" 00273 IntIntTblLR *imap; // mutable 00274 FP::List *fps; // readonly after init 00275 00276 /* "ci" is the index of this cache entry. Its most significant bit 00277 (MSB) is used to represent whether the immutable fields "value" 00278 and "kids" have been read into memory. If the MSB is *not* 00279 set, then those fields have been read into memory. If the MSB is 00280 set, then the real index of this cache entry is "ci" without the 00281 MSB set, and the fields "value" and "kids" are actually pointers 00282 "ImmutableVal" objects that record the starting position and 00283 length of the value in the existing SMultiPKFile where the actual 00284 values are stored. 00285 00286 "uncommonNames" is the bit vector of this entry's uncommon names; 00287 it is interpreted relative to all the names of this entry's PKFile 00288 (i.e., the PKFile's "allNames" field). 00289 00290 The field "uncommonTag" is the combined fingerprint of the 00291 fingerprints for this entry's uncommon names. We do not store the 00292 combined fingerprint of the common names in the cache entry. For 00293 entries that don't have all common names it is unnecessary, and 00294 for entries that do have all common names, the entry will be 00295 stored in a table indexed by its common fingerprint. 00296 00297 The fields "imap" and "fps" are ``extra'' fields that are not 00298 required to perform a lookup. However, they are required to 00299 update the entry when the PKFile's set of common names changes. 00300 The "imap" is a table mapping indices of names in this entry's 00301 PKFile to the indices of the corresponding fingerprint in the 00302 "fps" list. Hence, the *domain* of "imap" is potentially sparse, 00303 while the *range* of "imap" is the interval "[0, fps->len)". 00304 00305 The fingerprints "fps" are read-only after initialization. They 00306 are the fingerprints for all of this entry's free variables, 00307 stored in the order in which they were passed to the AddEntry 00308 method when the entry was created. 00309 00310 Hence, the free variables for an entry "ce" are those names in 00311 "VPKFile(ce).allNames" corresponding to set bits in the abstract 00312 bit vector: 00313 00314 namesBV(ce) = VPKFile(ce).commonNames + *(ce.uncommonNames). 00315 00316 The fingerprint of name "i" (i.e., "VPKFile(ce).allNames[i]") is: 00317 00318 ce.fps[ce.imap[i]]. 00319 00320 If we write "num(bv)" to represent the number of set bits 00321 in the bit vector "bv", and "domain(tbl)" and "range(tbl)" to 00322 represent the domain and range of the table "tbl", then we have the 00323 following invariants: 00324 00325 I0. ce.fps->len = num(namesBV(ce)) 00326 I1. domain(imap) = namesBV(ce) 00327 I2. range(imap) = { i : 0 <= i < ce.fps->len } 00328 00329 The "uncommonTag" is computed from the "uncommonNames", "imap", 00330 and "fps" fields. Define: 00331 00332 FPsXOR(bv, map) = 00333 { XOR i | bv.Read(i) | fps[map[i]].w } 00334 00335 FPsCombine(bv, map) = 00336 { FP::T::Extend i | bv.Read(i) | fps[map[i]].w } 00337 00338 That is, "FPsXOR(bv, map)" is the exclusive OR of the words in the 00339 entry's "fps" field for the set bits of "bv" (after adjusting the 00340 bit index via "map"). Similarly, "FPsCombine(bv, map)" is the 00341 combined fingerprint of those same words (in order of increasing 00342 bit index in "bv"). For a particular value of "ce.uncommonNames" 00343 and "ce.imap", the invariant for the "ce.uncommonTag" field is 00344 then given by: 00345 00346 I3. ce.uncommonTag.xor = FPsXOR(ce.uncommonNames, ce.imap) 00347 /\ ce.uncommonTag.fp = FPsCombine(ce.uncommonNames, ce.imap) 00348 00349 Whenever "uncommonNames" or "imap" changes, "uncommonTag" must be 00350 re-computed to establish I3. 00351 00352 On a lookup, the "imap" field is only necessary if either the "xor" 00353 or "fp" field of the "uncommonTag" must be computed. When a new 00354 entry is added, we always compute the "xor" field. But the "fp" 00355 field is computed lazily. Hence, we must keep the "imap" in memory 00356 for new entries. However, when a new entry is flushed to disk, the 00357 "fp" field is unlazied. Hence, we do not have to read the "imap" 00358 field into memory when we read an entry off the disk. */ 00359 00360 // hide copy constructor from clients 00361 T(const T&); 00362 }; 00363 00364 // A list of cache entries 00365 // 00366 // Since the "tail" field of a list cell is mutable, reads and 00367 // writes on it must be protected by a lock. This is most likely 00368 // the lock that protects the field that points to the head of 00369 // the linked list. It is denoted here by "ListLock(SELF)". 00370 class List { 00371 public: 00372 // constructor 00373 List(T *head, List *tail=(List *)NULL) throw () 00374 /* Return a new list with formed by consing together the element 00375 "head" with the list "tail". */ 00376 { this->head = head; this->tail = tail; } 00377 00378 // length 00379 static unsigned Length(const List *list) throw (); 00380 00381 // accessors 00382 T* Head() const throw () { return head; } 00383 List* Tail() const throw () 00384 /* REQUIRES ListLock(SELF) IN LL */ 00385 { return tail; } 00386 void SetTail(List *newTail) throw () 00387 /* REQUIRES ListLock(SELF) IN LL */ 00388 { tail = newTail; } 00389 private: 00390 T *head; // readonly after init 00391 List *tail; // mutable 00392 }; 00393 } 00394 00395 #endif // _CACHE_ENTRY_H