Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

CacheEntry.H

Go to the documentation of this file.
00001 // Copyright (C) 2001, Compaq Computer Corporation
00002 // 
00003 // This file is part of Vesta.
00004 // 
00005 // Vesta is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 // 
00010 // Vesta is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 // 
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with Vesta; if not, write to the Free Software
00017 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018 
00019 // Last modified on Fri Apr 29 17:58:36 EDT 2005 by ken@xorian.net
00020 //      modified on Fri Nov  7 16:59:46 PST 1997 by heydon
00021 
00022 // CacheEntry.H -- defines the cache entry class "CE::T" (and lists thereof)
00023 
00024 #ifndef _CACHE_ENTRY_H
00025 #define _CACHE_ENTRY_H
00026 
00027 #include <Basics.H>
00028 #include <FS.H>
00029 #include <VestaLog.H>
00030 #include <Recovery.H>
00031 #include <FP.H>
00032 #include <BitVector.H>
00033 #include <Model.H>
00034 #include <CacheIndex.H>
00035 #include <VestaVal.H>
00036 
00037 #include "IntIntTblLR.H"
00038 #include "Combine.H"
00039 
00040 namespace CE
00041 {
00042   // CE::T -- A cache entry
00043 
00044   /* The methods of a "CE::T" are unsynchronized. However, each entry
00045      "ce" is naturally associated with a "VPKFile". We denote this by
00046      "VPKFile(ce)". The methods of a "CE::T" that read or write
00047      potentially changing parts of the entry state require that the lock
00048      "VPKFile(SELF).mu" be held. */
00049 
00050   class T {
00051   public:
00052     // constructors/destructor
00053     T(CacheEntry::Index ci, BitVector *uncommonNames, FP::List *fps,
00054       IntIntTblLR *imap, VestaVal::T *value, CacheEntry::Indices *kids,
00055       Model::T model) throw ();
00056     /* REQUIRES VPKFile(SELF).mu IN LL */
00057     /* Initialize the new cache entry, where "ci" is the index of the
00058        entry and "uncommonNames" is the set of uncommon free variables
00059        for this entry (relative to the names stored in "VPKFile(SELF)").
00060        "fps[imap(i)]" is the fingerprint of the "i"th name in the list of
00061        names "VPKFile(SELF).allNames", "value" is the entry's value,
00062        "kids" are the indices of this entry's children, and "model" is the
00063        model for this entry. All pointer arguments are required to be
00064        non-NULL.
00065 
00066        As opposed to the "uncommonNames" bit vector, the "fps" array is
00067        not sparse. It contains exactly as many values as there are free
00068        variables for this entry.
00069 
00070        The responsibility for deleting the space for "uncommonNames",
00071        "fps", "imap", "value", and "kids" is transferred from caller to
00072        callee. */
00073 
00074     T(const T *ce) throw ();
00075     /* REQUIRES VPKFile(SELF).mu IN LL */
00076     /* Make a copy of the entry "ce". This copies the mutable fields and
00077        uses references for the immutable fields where possible. It is like
00078        a copy constructor, but requires an explicit pointer to an existing
00079        entry as an argument, so it is not likely to be used inadvertently.
00080        The regular copy constructor is still private. */
00081 
00082     T(RecoveryReader &rd) throw (VestaLog::Error, VestaLog::Eof)
00083     { Recover(rd); }
00084     /* REQUIRES VPKFile(SELF).mu IN LL */
00085     /* Recover the cache entry from "rd" including the ``extra''
00086        fields "imap" and "fps". */
00087 
00088     T(std::ifstream &ifs, bool readImmutable = true)
00089       throw (FS::EndOfFile, FS::Failure)
00090     { Read(ifs, readImmutable); }
00091     /* REQUIRES VPKFile(SELF).mu IN LL */
00092     /* Read the cache entry from "ifs". This method does not read
00093        the ``extra'' fields "imap" and "fps"; use the "ReadExtras"
00094        routine below to read them. See the "Read" method below for
00095        the meaning of the "readImmutable" argument. */
00096 
00097     // computation functions
00098     FP::Tag *CombineFP(const BitVector& bv) const throw ()
00099     /* REQUIRES VPKFile(SELF).mu IN LL */
00100     /* Returns a new fingerprint that is the result of combining the
00101        fingerprints "fps[imap(i)]" for all "i" such that "bv.Read(i)".
00102        It is an unchecked runtime error for "bv" to have a bit set
00103        whose index is not in the domain of "imap". */
00104     { return NEW_PTRFREE_CONSTR(Combine::FPTag, (*fps, bv, imap)); }
00105 
00106     bool FPMatch(const FP::List& pkFPs) throw ();
00107     /* REQUIRES VPKFile(SELF).mu IN LL */
00108     /* Return "true" iff the fingerprints "pkFPs" constitute a match
00109        against the uncommon FP's stored in this entry. The "uncommonNames"
00110        bit vector for this entry is used to compute the combined uncommon
00111        fingerprint for the fingerprints "pkFPs", and the result is
00112        compared against the entry's combined uncommon fingerprint. */
00113 
00114     void Pack(const BitVector *packMask, const IntIntTblLR *reMap)
00115       throw ();
00116     /* REQUIRES VPKFile(SELF).mu IN LL */
00117     /* REQUIRES (packMask == NULL) == (reMap == NULL) */
00118     /* Pack this enty's "uncommonNames" and "imap" according to "packMask"
00119        and "reMap". If both "packMask" and "reMap" are NULL, this is a
00120        no-op. Otherwise, the entry's fields are updated to reflect the
00121        fact that names corresponding to unset bits in "packMask" have been
00122        deleted from this entry's PKFile's "allNames" list.
00123 
00124        In particular, the entry's "uncommonNames" are packed according
00125        to "packMask": all bits in "uncommonNames" corresponding to unset
00126        bits in "packMask" are removed (these bits are all required to be
00127        unset in "uncommonNames" as well), and the remaining bits are
00128        packed toward the LSB. "imap" is updated according to "reMap":
00129        each entry "k -> v" is replaced by the entry "k' -> v", where
00130        "k -> k'" in "reMap". */
00131 
00132     void CycleNames(const BitVector &delBV,
00133                     const IntIntTblLR &delMap) throw ();
00134     /* REQUIRES VPKFile(SELF).mu IN LL */
00135     /* REQUIRES !delBV.IsEmpty() */
00136     /* Update this cache entry's "uncommonNames" and "imap" fields to
00137        reflect the fact that names with indices corresponding to set bits
00138        in "delBV" are being assigned new indices according to
00139        "delMap". 
00140 
00141        In particular, the domain of "delMap" is precisely the set of
00142        set bits in "delBV"; the entry "old -> new" in "delMap" means
00143        that names with index "old" should now have index "new". The
00144        corresponding ``old'' bits in "uncommonNames" are required to all
00145        be set; they are reset and the corresponding ``new'' bits (which
00146        are required to all be initially reset) are set. Similarly, for
00147        each entry "old -> new" in "delMap", the enty "old -> ix" is
00148        replaced by the enty "new ->ix" in "imap". "imap" is required to
00149        have all of the ``old'' values in its initial domain, but none of
00150        the ``new'' values. */ 
00151 
00152     void Update(const BitVector *exCommonNames,
00153                 const BitVector *exUncommonNames, const BitVector *packMask,
00154                 const IntIntTblLR *reMap) throw ();
00155     /* REQUIRES VPKFile(SELF).mu IN LL */
00156     /* REQUIRES (packMask == NULL) == (reMap == NULL) */
00157     /* Update this entry's "uncommonNames" (and corresponding uncommon
00158        fingerprint) according to the arguments. Bits in "exCommonNames"
00159        (if it is non-NULL) are set in "uncommonNames", and bits in
00160        "exUncommonNames" (if it is non-NULL) are reset in "uncommonNames".
00161        If "uncommonNames" is changed at all, the uncommon fingerprint is
00162        recomputed. Finally, "uncommonNames" and "imap" are ``packed''
00163        according to "packMask" and "reMap" as described by the "Pack"
00164        method above. */
00165 
00166     void XorUncommonNames(const BitVector &mask) throw ();
00167     /* REQUIRES VPKFile(SELF).mu IN LL */
00168     /* Exclusive-OR the "mask" bits with this entry's "uncommonNames",
00169        and recompute the entry's "uncommonTag" since the set of uncommon
00170        names has changed. */
00171 
00172     void UnlazyUncommonFP() throw ()
00173     { this->uncommonTag.UnlazyFPVal(*fps, *uncommonNames, imap); }
00174     /* REQUIRES VPKFile(SELF).mu IN LL */
00175     /* Unlazy the tag of the uncommon names. */
00176 
00177     bool UncommonFPIsUnlazied() throw ()
00178     { return this->uncommonNames->Size() == 0
00179         || this->uncommonTag.FPValIsUnlazied(); }
00180 
00181     // Consistency checking
00182     static bool CheckUsedNames(const BitVector *commonNames,
00183                                const BitVector *uncommonNames,
00184                                const IntIntTblLR* imap,
00185                                const FP::List *fps,
00186                                /*OUT*/ unsigned int &missing) throw ();
00187     bool CheckUsedNames(const BitVector *commonNames,
00188                         /*OUT*/ unsigned int &missing)
00189       const throw ()
00190     {
00191       return CE::T::CheckUsedNames(commonNames, this->uncommonNames,
00192                                    this->imap, this->fps,
00193                                    missing);
00194     }
00195     /* REQUIRES VPKFile(SELF).mu IN LL */
00196     /* Check that all names used by this entry (determined from
00197        imap/fps) are either in this entry's uncommonNames or the
00198        passed commonNames.  (Note that commonNames can be NULL if
00199        this is a new uncommon entry, in which case its own
00200        uncommonNames should include all the names the entry uses.)
00201        The result indicates whether there are any names missing
00202        from the union of uncommonNames and commonNames.  If it
00203        returns true (indicating a problem), "missing" is set to
00204        the index of the first name found not marked as used.  The
00205        class member (static version) is provided to make it
00206        possible to perform this consistency check prior to
00207        creating a CE::T object. */
00208 
00209     // Accessor functions
00210     const BitVector* UncommonNames() const throw ()
00211     { return this->uncommonNames; }
00212     /* REQUIRES VPKFile(SELF).mu IN LL */
00213     CacheEntry::Index CI() const throw () { return this->ci; }
00214     const VestaVal::T* Value() const throw () { return this->value; }
00215     const CacheEntry::Indices *Kids() const throw () { return this->kids; }
00216     const IntIntTblLR *IMap() const throw () { return this->imap; }
00217     /* REQUIRES VPKFile(SELF).mu IN LL */
00218     const FP::List *FPs() const throw () { return this->fps; }
00219 
00220     // log/recover
00221     void Log(VestaLog& log) const throw (VestaLog::Error);
00222     /* REQUIRES VPKFile(SELF).mu IN LL */
00223     /* Append this cache entry to "log", including the ``extra''
00224        fields "imap" and "fps". Requires "log" to be in the
00225        "logging" state. */
00226     void Recover(RecoveryReader &rd)
00227       throw (VestaLog::Error, VestaLog::Eof);
00228     /* REQUIRES VPKFile(SELF).mu IN LL */
00229     /* Recover the cache entry from "rd", including the ``extra''
00230        fields "imap" and "fps". */
00231 
00232     // write/read
00233     /* Note: neither "Write" nor "Read" writes/reads the ``extra'' fields
00234        "imap" and "fps"; use the "WriteExtras" and "ReadExtras" methods
00235        below to write/read them. */
00236     void Write(std::ostream &ofs, std::ifstream *ifs = NULL) const
00237       throw (FS::Failure);
00238     /* REQUIRES VPKFile(SELF).mu IN LL */
00239     /* If the immutable fields "value" and "kids" were not read into
00240        memory, but are instead represented by portions of a file, then
00241        "ifs" must be non-NULL; its bytes are written from there. */
00242     void Read(std::istream &ifs, bool readImmutable = true)
00243       throw (FS::EndOfFile, FS::Failure);
00244     /* REQUIRES VPKFile(SELF).mu IN LL */
00245     /* The immutable fields "value" and "kids" are read into memory by
00246        "Read" iff "readImmutable" is "true". If "readImmutable" is false,
00247        those fields are instead set to pointers to "ImmutableVal" objects
00248        that record the starting position and length of the values in
00249        "ifs". */
00250 
00251     // write/read extras
00252     void WriteExtras(std::ostream &ofs) const throw (FS::Failure);
00253     /* REQUIRES VPKFile(SELF).mu IN LL */
00254     void ReadExtras(std::istream &ifs) throw (FS::EndOfFile, FS::Failure);
00255     /* REQUIRES VPKFile(SELF).mu IN LL */
00256 
00257     // print
00258     void Debug(std::ostream &os, int indent = 0) const throw ();
00259     /* REQUIRES VPKFile(SELF).mu IN LL */
00260     void DebugExtras(std::ostream &os, int indent = 0) const throw ();
00261     /* REQUIRES VPKFile(SELF).mu IN LL */
00262 
00263   private:
00264     // data fields
00265     CacheEntry::Index ci;          // readonly after init; see below
00266     Model::T model;                // readonly after init
00267     BitVector *uncommonNames;      // mutable
00268     Combine::XorFPTag uncommonTag; // mutable
00269     VestaVal::T *value;            // readonly after init; see below
00270     CacheEntry::Indices *kids;     // readonly after init; see below
00271 
00272     // "extras"
00273     IntIntTblLR *imap;             // mutable
00274     FP::List *fps;                     // readonly after init
00275 
00276     /* "ci" is the index of this cache entry. Its most significant bit
00277        (MSB) is used to represent whether the immutable fields "value"
00278        and "kids" have been read into memory. If the MSB is *not*
00279        set, then those fields have been read into memory. If the MSB is
00280        set, then the real index of this cache entry is "ci" without the
00281        MSB set, and the fields "value" and "kids" are actually pointers
00282        "ImmutableVal" objects that record the starting position and
00283        length of the value in the existing SMultiPKFile where the actual
00284        values are stored.
00285 
00286        "uncommonNames" is the bit vector of this entry's uncommon names;
00287        it is interpreted relative to all the names of this entry's PKFile
00288        (i.e., the PKFile's "allNames" field).
00289 
00290        The field "uncommonTag" is the combined fingerprint of the
00291        fingerprints for this entry's uncommon names. We do not store the
00292        combined fingerprint of the common names in the cache entry. For
00293        entries that don't have all common names it is unnecessary, and
00294        for entries that do have all common names, the entry will be
00295        stored in a table indexed by its common fingerprint.
00296  
00297        The fields "imap" and "fps" are ``extra'' fields that are not
00298        required to perform a lookup. However, they are required to
00299        update the entry when the PKFile's set of common names changes.
00300        The "imap" is a table mapping indices of names in this entry's
00301        PKFile to the indices of the corresponding fingerprint in the
00302        "fps" list. Hence, the *domain* of "imap" is potentially sparse,
00303        while the *range* of "imap" is the interval "[0, fps->len)".
00304  
00305        The fingerprints "fps" are read-only after initialization. They
00306        are the fingerprints for all of this entry's free variables,
00307        stored in the order in which they were passed to the AddEntry
00308        method when the entry was created.
00309 
00310        Hence, the free variables for an entry "ce" are those names in
00311        "VPKFile(ce).allNames" corresponding to set bits in the abstract
00312        bit vector:
00313 
00314        namesBV(ce) =  VPKFile(ce).commonNames + *(ce.uncommonNames).
00315 
00316        The fingerprint of name "i" (i.e., "VPKFile(ce).allNames[i]") is:
00317 
00318        ce.fps[ce.imap[i]].
00319 
00320        If we write "num(bv)" to represent the number of set bits
00321        in the bit vector "bv", and "domain(tbl)" and "range(tbl)" to
00322        represent the domain and range of the table "tbl", then we have the
00323        following invariants:
00324 
00325        I0. ce.fps->len = num(namesBV(ce))
00326        I1. domain(imap) = namesBV(ce)
00327        I2. range(imap) = { i : 0 <= i < ce.fps->len }
00328 
00329        The "uncommonTag" is computed from the "uncommonNames", "imap",
00330        and "fps" fields. Define:
00331 
00332        FPsXOR(bv, map) =
00333        { XOR i | bv.Read(i) | fps[map[i]].w }
00334 
00335        FPsCombine(bv, map) =
00336        { FP::T::Extend i | bv.Read(i) | fps[map[i]].w }
00337 
00338        That is, "FPsXOR(bv, map)" is the exclusive OR of the words in the
00339        entry's "fps" field for the set bits of "bv" (after adjusting the
00340        bit index via "map"). Similarly, "FPsCombine(bv, map)" is the
00341        combined fingerprint of those same words (in order of increasing
00342        bit index in "bv"). For a particular value of "ce.uncommonNames"
00343        and "ce.imap", the invariant for the "ce.uncommonTag" field is
00344        then given by: 
00345 
00346        I3. ce.uncommonTag.xor = FPsXOR(ce.uncommonNames, ce.imap)
00347        /\ ce.uncommonTag.fp = FPsCombine(ce.uncommonNames, ce.imap)
00348 
00349        Whenever "uncommonNames" or "imap" changes, "uncommonTag" must be
00350        re-computed to establish I3.
00351 
00352        On a lookup, the "imap" field is only necessary if either the "xor"
00353        or "fp" field of the "uncommonTag" must be computed. When a new
00354        entry is added, we always compute the "xor" field. But the "fp"
00355        field is computed lazily. Hence, we must keep the "imap" in memory
00356        for new entries. However, when a new entry is flushed to disk, the
00357        "fp" field is unlazied. Hence, we do not have to read the "imap"
00358        field into memory when we read an entry off the disk. */
00359 
00360     // hide copy constructor from clients
00361     T(const T&);
00362   };
00363 
00364   // A list of cache entries
00365   //
00366   // Since the "tail" field of a list cell is mutable, reads and
00367   // writes on it must be protected by a lock. This is most likely
00368   // the lock that protects the field that points to the head of
00369   // the linked list. It is denoted here by "ListLock(SELF)".
00370   class List {
00371   public:
00372     // constructor
00373     List(T *head, List *tail=(List *)NULL) throw ()
00374     /* Return a new list with formed by consing together the element
00375        "head" with the list "tail". */
00376     { this->head = head; this->tail = tail; }
00377 
00378     // length
00379     static unsigned Length(const List *list) throw ();
00380 
00381     // accessors
00382     T* Head() const throw () { return head; }
00383     List* Tail() const throw ()
00384     /* REQUIRES ListLock(SELF) IN LL */
00385     { return tail; }
00386     void SetTail(List *newTail) throw ()
00387     /* REQUIRES ListLock(SELF) IN LL */
00388     { tail = newTail; }
00389   private:
00390     T *head;     // readonly after init
00391     List *tail;  // mutable
00392   };
00393 }
00394 
00395 #endif // _CACHE_ENTRY_H

Generated on Mon May 8 00:48:33 2006 for Vesta by  doxygen 1.4.2