Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

Weeder.C

Go to the documentation of this file.
00001 // Copyright (C) 2001, Compaq Computer Corporation
00002 // 
00003 // This file is part of Vesta.
00004 // 
00005 // Vesta is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 // 
00010 // Vesta is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 // 
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with Vesta; if not, write to the Free Software
00017 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018 
00019 // Created on Mon Mar  3 16:48:41 PST 1997 by heydon
00020 
00021 #include <time.h>
00022 #include <stdio.h>
00023 #include <Basics.H>
00024 #include <SRPC.H>
00025 #include <VestaLog.H>
00026 #include <Recovery.H>
00027 #include <VestaLogSeq.H>
00028 #include <SourceOrDerived.H>
00029 #include <CacheIntf.H>
00030 #include <CacheConfig.H>
00031 #include <BitVector.H>
00032 #include <GraphLog.H>
00033 #include <WeederC.H> // cache server weeder-client interface
00034 #include <Debug.H>
00035 #include <BufStream.H>
00036 #include <AtomicFile.H>
00037 
00038 #include "PkgBuild.H"
00039 #include "CommonErrors.H"
00040 #include "WeedArgs.H"
00041 #include "WeederConfig.H"
00042 #include "RootTbl.H"
00043 #include "GLNode.H"
00044 #include "GLNodeBuffer.H"
00045 #include "GatherWeedRoots.H"
00046 #include "ReposRoots.H"
00047 #include "Weeder.H"
00048 
00049 using std::ios;
00050 using std::ifstream;
00051 using std::ofstream;
00052 using std::cout;
00053 using std::cin;
00054 using std::cerr;
00055 using std::endl;
00056 using std::hex;
00057 using std::dec;
00058 using Basics::OBufStream;
00059 
00060 // Misc -----------------------------------------------------------------------
00061 
00062 void Weeder::TimedMsg(char *msg, bool blankline, int level) throw ()
00063 {
00064     if (this->debug >= level) {
00065         cout << Debug::Timestamp() << msg << endl;
00066         if (blankline) cout << endl;
00067     }
00068 }
00069 
00070 // Derived ShortIds -----------------------------------------------------------
00071 
00072 /* To reduce the number of redundant ShortIds written to the file of derived
00073    files to keep, we keep a table of ShortIds written recently. The hope is
00074    that there will be some locality to the redundant ShortIds.
00075 
00076    The table size is bounded. If the table is full and we need to add a new
00077    entry, we first remove the oldest entry in the table. This is done by
00078    keeping a queue that records the order in which DIs were added to the
00079    table. */
00080 
00081 typedef Table<IntKey,char>::Default ShortIdTbl;
00082 
00083 typedef Sequence<ShortId,true> ShortIdSeq;
00084 
00085 static ShortIdTbl *shortIdTbl;
00086 static ShortIdSeq *shortIdSeq;
00087 
00088 static void WriteShortId(FILE *fp, ShortId id) throw (FS::Failure)
00089 /* Write the ShortId "id" to "fp" on its own line in a format understood
00090    by the repository's derived weeder. */
00091 {
00092     // put the entry in the table; return if the entry was already there
00093     IntKey key(id);
00094     if (shortIdTbl->Put(key, (char)true)) {
00095         return;
00096     }
00097 
00098     // remove an entry from the table if it is full
00099     if (shortIdTbl->Size() >= Config_DIBuffSize) {
00100         assert(shortIdTbl->Size() == Config_DIBuffSize);
00101         char dummy;
00102         ShortId oldId = shortIdSeq->remlo();
00103         bool inTbl = shortIdTbl->Delete(IntKey(oldId), /*OUT*/ dummy);
00104         assert(inTbl);
00105     }
00106     shortIdSeq->addhi(id);
00107 
00108     // write the entry
00109     int res = fprintf(fp, "%08x\n", id);
00110     if (res < 0) throw (FS::Failure("fprintf(3)"));
00111 } // WriteShortId
00112 
00113 static FILE *OpenShortIdFile(/*OUT*/ ShortId &disShortId)
00114   throw (FS::Failure, ReposError)
00115 /* Create a new derived file, write the ShortId into the file itself, and
00116    return a "FILE *" on the opened file. Set "disShortId" to the ShortId of
00117    the new file. */
00118 {
00119     // open file
00120     FILE *res = (FILE *)NULL;
00121     int fd;
00122     try {
00123         fd = SourceOrDerived::fdcreate(
00124           /*OUT*/ disShortId, /*leafflag=*/ true);
00125     } catch (SourceOrDerived::Fatal &f) {
00126         throw ReposError(VestaSource::noPermission, "fdcreate",f.msg.cchars());
00127     }
00128     if (fd == -1) {
00129         throw FS::Failure("SourceOrDerived::fdcreate");
00130     }
00131     if ((res = fdopen(fd, "w")) == (FILE *)NULL) {
00132         throw FS::Failure("fdopen(3)");
00133     }
00134 
00135     // initialize global data structures
00136     shortIdTbl =
00137       NEW_CONSTR(ShortIdTbl, (/*sizeHint=*/ Config_DIBuffSize, /*useGC=*/ true));
00138     shortIdSeq = NEW_CONSTR(ShortIdSeq, (/*sizeHint=*/ Config_DIBuffSize));
00139 
00140     // write the ShortId of the file itself to the file
00141     WriteShortId(res, disShortId);
00142     return res;
00143 } // OpenShortIdFile
00144 
00145 static void CloseShortIdFile(FILE *fp) throw (FS::Failure)
00146 {
00147     if (fflush(fp) != 0) throw FS::Failure("fflush(3)");
00148     int fd = fileno(fp); assert(fd != -1);
00149     if (fsync(fd) != 0) throw FS::Failure("fsync(2)");
00150     if (fclose(fp) != 0) throw FS::Failure("fclose(3)");
00151 }
00152 
00153 // Weeding --------------------------------------------------------------------
00154 
00155 Weeder::Weeder(CacheIntf::DebugLevel debug) throw (SRPC::failure, FS::Failure)
00156   : debug(debug), weeded(NULL), initCIs(NULL), marked(NULL),
00157     wLeases(NULL), glCIs(NULL), graphLogSeq(Config_GraphLogPath.chars())
00158 {
00159   // initialize the cache client
00160   this->cache = NEW_CONSTR(WeederC, (debug));
00161 
00162   // recover
00163   this->Recover();
00164 }
00165 
00166 void Weeder::Recover() throw (SRPC::failure, FS::Failure)
00167 {
00168     // recover stable state
00169     RecoverWeeded();
00170     if (!(this->weeded->IsEmpty())) {
00171         RecoverMiscVars();
00172     }
00173 
00174     // tell cache server we have recovered
00175     if (this->cache->WeederRecovering(!(this->weeded->IsEmpty()))) {
00176         cerr << "Fatal error: another weed is already in progress!" << endl;
00177         exit(1);
00178     }
00179 }
00180 
00181 static void Weeder_QueryDelStatus(/*INOUT*/ WeedArgs::DeletionStatus &stat)
00182   throw ()
00183 /* If "stat" is "QueryDeletions", ask the user for confirmation, and set
00184    "stat" to either "WeedArgs::NoDeletions" or "WeedArgs::DoDeletions"
00185    depending on the response. */
00186 {
00187     while (stat == WeedArgs::QueryDeletions) {
00188         cout << "Proceed to weeder deletion phase (yes/no)? ";
00189         cout.flush();
00190         const int BuffLen = 20; char buff[BuffLen];
00191         cin.getline(buff, BuffLen);
00192         if (strcmp(buff, "yes") == 0) {
00193             stat = WeedArgs::DoDeletions;
00194             cout << endl;
00195         } else if (strcmp(buff, "no") == 0) {
00196             stat = WeedArgs::NoDeletions;
00197             cout << endl;
00198         } else {
00199             cout << "Please answer \"yes\" or \"no\"." << endl << endl;
00200         }
00201     }
00202 } // Weeder_QueryDelStatus
00203 
00204 bool Weeder::Weed(WeedArgs &args) throw (SRPC::failure,
00205   FS::Failure, FS::DoesNotExist, VestaLog::Error, VestaLog::Eof,
00206   InputError, SysError, ReposError)
00207 {
00208     bool resumed = false;
00209     this->args = &args;
00210 
00211     // mark phase
00212     if (this->weeded->IsEmpty()) {
00213         if (args.noNew) {
00214             cout << "No new weed instructions given; exiting" << endl;
00215             exit(0);
00216         }
00217         if (this->debug >= CacheIntf::StatusMsgs) {
00218             TimedMsg("Starting weeder mark phase");
00219         }
00220         this->MarkPhase();
00221         if (this->debug >= CacheIntf::StatusMsgs) {
00222             TimedMsg("Finished weeder mark phase", /*blankline=*/ false);
00223             cout << "  total entries marked = "
00224                  << this->markedCntTotal << endl;
00225             cout << "  total entries to be weeded = "
00226                  << this->weeded->Cardinality() << endl;
00227             cout << endl;
00228         }
00229 
00230         // write misc vars to see disShortId
00231         WriteMiscVars(); // must be done *before* "WriteWeeded"
00232 
00233         // ask for confirmation if necessary
00234         Weeder_QueryDelStatus(/*INOUT*/ args.delStatus);
00235 
00236         if (args.delStatus == WeedArgs::DoDeletions) {
00237             // stably log weeded set
00238             WriteWeeded();   // commit point for end of mark phase
00239         }
00240     } else {
00241         cout <<"*** NOTE: Resuming previous failed weed."<<endl;
00242         resumed = true;
00243 
00244         // ask for confirmation if necessary
00245         Weeder_QueryDelStatus(/*INOUT*/ args.delStatus);
00246     }
00247 
00248     // deletion phase
00249     if (args.delStatus == WeedArgs::DoDeletions) {
00250         if (this->debug >= CacheIntf::StatusMsgs) {
00251             TimedMsg("Starting weeder deletion phase");
00252         }
00253         this->DeletionPhase();
00254         if (this->debug >= CacheIntf::StatusMsgs) {
00255             TimedMsg("Finished weeder deletion phase");
00256         }
00257     } else {
00258         // If there was an incomplete weed, it's still incomplete
00259         resumed = false;
00260     }
00261     return resumed;
00262 } // Weed
00263 
00264 // Marking phase --------------------------------------------------------------
00265 
00266 void Weeder::MarkPhase() throw (SRPC::failure, FS::Failure,
00267   FS::DoesNotExist, VestaLog::Error, VestaLog::Eof, InputError,
00268   SysError, ReposError)
00269 {
00270     // record & log start time for purposes of derived weeding
00271     (void) time(&(this->startTime)); // must be done *before* "StartMark"
00272 
00273     // start the marking phase
00274     this->initCIs = cache->StartMark(/*OUT*/ this->newLogVer);
00275 
00276     // record the keep time
00277     (void) time(&(this->keepTime)); // must be done *after* "StartMark"
00278     this->keepTime -= this->args->keepSecs;
00279 
00280     // read the weeder instructions
00281     this->instrRoots = NEW(RootTbl);
00282     try {
00283         GatherWeedRoots::P(this->args->instrFile,
00284           this->args->globInstrs, /*OUT*/ this->instrRoots);
00285     } catch (...) { this->cache->ResumeLeaseExp(); throw; }
00286 
00287     // do requested work on graphLog
00288     this->MarkWork();
00289 } // MarkPhase
00290 
00291 static void UnlinkFile(const Text &filename) throw ()
00292 /* Invoke the unlink(2) command on "filename". Errors
00293    are (intentionally) ignored. */
00294 { (void) unlink(filename.chars()); }
00295 
00296 void Weeder::MarkWork() throw (SRPC::failure, FS::Failure, VestaLog::Error,
00297   VestaLog::Eof, SourceOrDerived::Fatal, ReposError)
00298 {
00299     BitVector *leasedCIs;
00300     try {
00301       this->marked = NEW_CONSTR(BitVector, (/*sizeHint=*/ this->initCIs->Size()));
00302       this->nodeBuff =
00303         NEW_CONSTR(GLNodeBuffer, (/*maxSize=*/ Config_GLNodeBuffSize));
00304       this->markedCntTotal = 0;
00305 
00306       // open file to which derived ShortId's to keep will be written
00307       this->disFP = OpenShortIdFile(/*OUT*/ this->disShortId);
00308 
00309       // scan the repository for all models if requested
00310       PkgTbl *pkgTbl = (PkgTbl *)NULL;
00311       if (this->args->printRoots) {
00312         pkgTbl = ReposRoots::Scan(this->debug >= CacheIntf::StatusMsgs);
00313       }
00314 
00315       // pre debugging
00316       if (this->debug >= CacheIntf::StatusMsgs) {
00317         TimedMsg("Starting to mark cache entries reachable from roots");
00318       }
00319 
00320       // copy graphLog to "pendingGL", marking "roots"
00321       this->markedRoots = NEW(RootTbl);
00322       this->CopyGLtoPending(pkgTbl);
00323 
00324       // we no longer need the "instrRoots", so drop them on the floor
00325       this->instrRoots = (RootTbl *)NULL;
00326 
00327       // mark everything reachable from the roots
00328       while (this->markedCnt > 0) {
00329         this->markedCntTotal += this->markedCnt;
00330         this->ScanLogOnce();
00331       }
00332 
00333       // post debugging
00334       if (this->debug >= CacheIntf::StatusMsgs) {
00335         TimedMsg("Finished marking cache entries reachable from roots",
00336                  /*blankline=*/ false);
00337         cout << "  entries marked = " << this->markedCntTotal << endl;
00338         cout << endl;
00339       }
00340 
00341       // set the cache's hitFilter
00342       BitVector *toDelete = NEW_CONSTR(BitVector, (this->initCIs));
00343       *(toDelete) -= *(this->marked);
00344       this->cache->SetHitFilter(*toDelete);
00345       toDelete = (BitVector *)NULL; // drop on floor for GC
00346     
00347       // get currently leased entries
00348       leasedCIs = this->cache->GetLeases();
00349     } catch (...) {
00350         // resume lease expiration in the event of a failure
00351         this->cache->ResumeLeaseExp();
00352         throw;
00353     }
00354 
00355     // pre debugging
00356     int preLeaseTotal = this->markedCntTotal;
00357     if (this->debug >= CacheIntf::StatusMsgs) {
00358         TimedMsg("Started marking entries reachable from leased entries");
00359     }
00360 
00361     // mark everything reachable from leased entries
00362     this->cache->ResumeLeaseExp();
00363     if (this->debug >= CacheIntf::StatusMsgs) {
00364         TimedMsg("Started marking leased cache entries");
00365     }
00366     this->markedCnt = 0;
00367     {   // new scope for locals
00368         BVIter it(*leasedCIs);
00369         CacheEntry::Index ci;
00370         while (it.Next(/*OUT*/ ci)) {
00371             this->MarkNode(ci);
00372         }
00373     }
00374     if (this->debug >= CacheIntf::StatusMsgs) {
00375         TimedMsg("Finished marking leased cache entries", /*blankline=*/false);
00376         cout << "  entries marked = " << this->markedCnt << endl << endl;
00377     }
00378     while (this->markedCnt > 0) {
00379         this->markedCntTotal += this->markedCnt;
00380         this->ScanLogOnce();
00381     }
00382 
00383     // post debugging
00384     if (this->debug >= CacheIntf::StatusMsgs) {
00385         TimedMsg("Finished marking entries reachable from leased entries",
00386                  /*blankline=*/ false);
00387         int numMarked = this->markedCntTotal - preLeaseTotal;
00388         cout << "  entries marked = " << numMarked << endl << endl;
00389     }
00390 
00391     // Check for non-leased marked CIs which didn't have graph log
00392     // entries.  If there are any, it's big trouble because we can't
00393     // protect their child CIs, derived files, etc.  (It's OK for
00394     // leased entries to not have GL entries processed by the weeder,
00395     // as they may be new.  However, we wait until after processing
00396     // the leased entries becuase non-leased entries reachable from
00397     // leased entries *must* have GL entries.)
00398     {
00399       BitVector *marked_not_leased = *(this->marked) - *leasedCIs;
00400       leasedCIs = (BitVector *)NULL; // drop on floor for GC
00401       BitVector *marked_missing_gl = *marked_not_leased - *(this->glCIs);
00402       marked_not_leased = (BitVector *)NULL; // drop on floor for GC
00403       this->glCIs = (BitVector *)NULL;       // drop on floor for GC
00404       if(marked_missing_gl->Cardinality() > 0)
00405         {
00406           cerr << Debug::Timestamp() << "Fatal error:" << endl
00407                << "  Cache inconsistency detected: non-leased marked CI(s)"
00408                << endl
00409                << "   with no GL entry!" << endl
00410                << endl
00411                << "   cis missing GL entries = " << endl;
00412           marked_missing_gl->PrintAll(cerr, 6);
00413           cerr << "     (" << marked_missing_gl->Cardinality() << " total)"
00414                << endl << endl;
00415           exit(1);
00416         }
00417       marked_missing_gl = (BitVector *)NULL; // drop on floor for GC
00418     }
00419 
00420     // compute "this->weeded"
00421     this->weeded = this->initCIs;       // alias to "this->initCIs"
00422     this->initCIs = (BitVector *)NULL;  // drop on floor for GC
00423     *(this->weeded) -= *(this->marked); // subtract off marked entries
00424     this->marked = (BitVector *)NULL;   // drop on floor for GC
00425 
00426     // close derived ShortId file
00427     CloseShortIdFile(this->disFP);
00428 
00429     // drop structures on floor for GC
00430     shortIdTbl = (ShortIdTbl *)NULL;
00431     shortIdSeq = (ShortIdSeq *)NULL;
00432     this->disFP = (FILE *)NULL;
00433 
00434     // free file and memory resources
00435     this->nodeBuff = (GLNodeBuffer *)NULL;
00436     UnlinkFile(Config_PendingGLFile);
00437     UnlinkFile(Config_WorkingGLFile);
00438 } // MarkWork
00439 
00440 void Weeder::ScanLogOnce() throw (FS::Failure)
00441 {
00442     // pre debugging
00443     if (this->debug >= CacheIntf::WeederScans) {
00444         TimedMsg("Starting scan of pending graph log");
00445     }
00446     int nodesRead = 0;
00447 
00448     // open "workingGL" on current "pendingGL"
00449     ifstream workingGL;
00450     char *pendingGLFile = Config_PendingGLFile.chars();
00451     char *workingGLFile = Config_WorkingGLFile.chars();
00452     if (rename(pendingGLFile, workingGLFile) < 0) {
00453         throw FS::Failure("rename(2)", Config_PendingGLFile);
00454     }
00455     FS::OpenReadOnly(workingGLFile, /*OUT*/ workingGL);
00456     try {
00457 
00458     // open new "pendingGL" for writing
00459     FS::OpenForWriting(pendingGLFile, /*OUT*/ this->pendingGL);
00460     try {
00461 
00462     // scan
00463     this->markedCnt = 0;
00464     this->nodeBuff->flushedCnt = 0;
00465     while (!FS::AtEOF(workingGL)) {
00466         GLNode node(workingGL);
00467         nodesRead++;
00468         if (this->marked->Read(node.ci)) {
00469             this->ProcessNode(node);
00470         } else {
00471             this->nodeBuff->Put(node, this->pendingGL);
00472         }
00473     }
00474 
00475     // close "pendingGL"
00476     } catch (...) { FS::Close(pendingGL); throw; }
00477     FS::Close(pendingGL);
00478 
00479     // close "workingGL"
00480     } catch (...) { FS::Close(workingGL); throw; }
00481     FS::Close(workingGL);
00482 
00483     // post debugging
00484     if (this->debug >= CacheIntf::WeederScans) {
00485         TimedMsg("Finished scan of pending graph log", /*blankline=*/ false);
00486         cout << "  nodes read = " << nodesRead << endl;
00487         cout << "  entries marked = " << this->markedCnt << endl;
00488         cout << "  nodes written = " << this->nodeBuff->flushedCnt << endl;
00489         cout << endl;
00490     }
00491 } // ScanLogOnce
00492 
00493 void Weeder::CopyGLtoPending(const PkgTbl *pkgTbl)
00494   throw (FS::Failure, VestaLog::Error)
00495 {
00496     int writtenCnt = 0;
00497     bool unused_cis = false;
00498     this->markedCnt = 0;
00499 
00500     // pre debugging
00501     if (this->debug >= CacheIntf::WeederScans) {
00502         TimedMsg("Starting scan of graph log");
00503     }
00504     int rootCnt = 0, nodeCnt = 0;
00505     if (this->args->printRoots) {
00506         cout << "Disposition of GraphLog roots:" << endl;
00507         assert(pkgTbl != (PkgTbl *)NULL);
00508     }
00509 
00510     // Allocate a new BitVector to collect the set of CIs in the graph
00511     // log.
00512     this->glCIs = NEW_CONSTR(BitVector, (/*sizeHint=*/ this->initCIs->Size()));
00513 
00514     // open "pendingGL" file (for writing)
00515     FS::OpenForWriting(Config_PendingGLFile, /*OUT*/ this->pendingGL);
00516     try {
00517 
00518     // open log
00519     this->graphLogSeq.Open(/*version=*/ -1, /*readonly=*/ true);
00520     try {
00521 
00522     /* roots whose dispositions have been printed; only used if
00523        "this->args->printRoots". Note that we have to keep two tables
00524        because a root may be printed with both a '-' early in the
00525        log and a '>' (freshness indicator) later in the log. */
00526     RootTbl unkeptPrintedRootsTbl, keptPrintedRootsTbl;
00527 
00528     // copy "logSeq" to "pendingGL"
00529     RecoveryReader *rd;
00530     while ((rd = this->graphLogSeq.Next(this->newLogVer)) != NULL) {
00531         while (!rd->eof()) {
00532             GraphLog::Entry *entry = GraphLog::Entry::Recover(*rd);
00533             switch (entry->kind) {
00534               case GraphLog::RootKind:
00535                 { // see if this root is supposed to be kept
00536                   GraphLog::Root *root = (GraphLog::Root *)entry;
00537                   PkgBuild pkgBuild(root->pkgVersion, root->model);
00538                   bool dummy;
00539                   bool inTbl = this->instrRoots->Get(pkgBuild, /*OUT*/ dummy);
00540                   bool fresh = (root->ts >= this->keepTime);
00541                   bool keptRoot = (inTbl || fresh);
00542                   if (this->args->printRoots) {
00543                       // print it if it has not yet been printed
00544                       RootTbl *tbl = keptRoot
00545                         ? &keptPrintedRootsTbl
00546                         : &unkeptPrintedRootsTbl;
00547                       if (!tbl->Get(pkgBuild, /*OUT*/ dummy)) {
00548                           this->PrintRoot(pkgBuild, *pkgTbl, inTbl, fresh);
00549                           bool inTbl2 = tbl->Put(pkgBuild, false);
00550                           assert(!inTbl2);
00551                       }
00552                   }
00553 
00554                   // roots are kept either because they are in the
00555                   // instructions or because they are deemed fresh enough
00556                   if (keptRoot)
00557                     {
00558                       // if so, mark its corresponding CIs
00559                       CacheEntry::Indices *cis = root->cis;
00560                       for (int i = 0; i < cis->len; i++) {
00561                           this->MarkNode(cis->index[i]);
00562                       }
00563 
00564                       // add it to the table of marked roots, noting
00565                       // whether it was explicitly requested.
00566                       (void) this->markedRoots->Put(pkgBuild, inTbl);
00567                     }
00568 
00569                   // Check whether any of this root's CIs are
00570                   // unused.  If they are, this indicates an
00571                   // inconsistency (probably a cache or weeder
00572                   // bug).
00573                   for (int i = 0; i < root->cis->len; i++)
00574                     {
00575                       if(!this->initCIs->Read(root->cis->index[i]))
00576                         {
00577                           // Print the "fatal error" banner only if we
00578                           // haven't already done so.
00579                           if(!unused_cis)
00580                             {
00581                               cerr << Debug::Timestamp() << "Fatal error:"
00582                                    << endl;
00583                             }
00584                           // Print a text message about the problem(s)
00585                           // detected.
00586                           cerr << ("  Cache inconsistency detected: graph"
00587                                    " log root with unused CI(s)!")
00588                                << endl << endl;
00589                           // Dump the complete entry.
00590                           entry->DebugFull(cerr);
00591                           cerr << endl;
00592                           // Remember that we've encountered an entry
00593                           // wih unused CIs.
00594                           unused_cis = true;
00595                           // Don't bother checking the rest of the CIs
00596                           // referenced by this root.
00597                           break;
00598                         }
00599                     }
00600                 }
00601                 rootCnt++;
00602                 break;
00603               case GraphLog::NodeKind:
00604                 {
00605                   // write out the relevant fields as a "GLNode"
00606                   GraphLog::Node *node_entry = (GraphLog::Node *)entry;
00607                   GLNode node(*node_entry);
00608                   node.Write(this->pendingGL);
00609                   // Remember that there was a GL entry for this CI.
00610                   this->glCIs->Set(node.ci);
00611                   writtenCnt++;
00612                   nodeCnt++;
00613 
00614                   // Check whether this node's CI is unused, and
00615                   // whether any of this node's child CIs are unused.
00616                   // Unused CIs indicates an inconsistency (probably a
00617                   // cache or weeder bug).
00618                   bool self_unused = !this->initCIs->Read(node.ci),
00619                     kids_unused = false;
00620                   for (int i = 0; i < node.kids->len; i++)
00621                     {
00622                       if(!this->initCIs->Read(node.kids->index[i]))
00623                         {
00624                           kids_unused = true;
00625                           break;
00626                         }
00627                     }
00628                   if(self_unused || kids_unused)
00629                     {
00630                       // Print the "fatal error" banner only if we
00631                       // haven't already done so.
00632                       if(!unused_cis)
00633                         {
00634                           cerr << Debug::Timestamp() << "Fatal error:"
00635                                << endl;
00636                         }
00637                       // Print a text message about the problem(s)
00638                       // detected.
00639                       cerr << ("  Cache inconsistency detected: graph log"
00640                                " node with") << endl;
00641                       if(self_unused)
00642                         {
00643                           cerr << "an unused CI";
00644                         }
00645                       if(self_unused && kids_unused)
00646                         {
00647                           cerr << " and ";
00648                         }
00649                       if(kids_unused)
00650                         {
00651                           cerr << "unused child CI(s)";
00652                         }
00653                       cerr << "!" << endl << endl;
00654                       // Dump the complete entry.
00655                       entry->DebugFull(cerr);
00656                       cerr << endl;
00657                       // Remember that we've encountered an entry wih
00658                       // unused CIs.
00659                       unused_cis = true;
00660                     }
00661                 }
00662                 break;
00663               default:
00664                 assert(false);
00665             }
00666         }
00667     }
00668     // If we found entries with unused CIs, die.  We wait until now so
00669     // that if there's more than one we will print them all.
00670     if(unused_cis)
00671       {
00672         exit(1);
00673       }
00674 
00675     if (this->args->printRoots) {
00676         if (rootCnt == 0) cout << "  NONE FOUND" << endl;
00677         cout << endl;
00678     }
00679 
00680     // close log
00681     } catch (...) {
00682         if (this->args->printRoots) {
00683             if (rootCnt == 0) cout << "  NONE FOUND SO FAR" << endl;
00684             cout << endl;
00685         }
00686         this->graphLogSeq.Close();
00687         throw;
00688     }
00689     this->graphLogSeq.Close();
00690 
00691     // close "pendingGL" file
00692     } catch (...) { FS::Close(pendingGL); throw; }
00693     FS::Close(pendingGL);
00694 
00695     // post debugging
00696     if (this->debug >= CacheIntf::WeederScans) {
00697         TimedMsg("Finished scan of graph log", /*blankline=*/ false);
00698         cout << "  roots read = " << rootCnt << endl;
00699         cout << "  nodes read = " << nodeCnt << endl;
00700         cout << "  entries marked = " << this->markedCnt << endl;
00701         cout << "  nodes written = " << writtenCnt << endl;
00702         cout << endl;
00703     }
00704 } // CopyGLtoPending
00705 
00706 void Weeder::MarkNode(CacheEntry::Index ci) throw (FS::Failure)
00707 {
00708     if (!(this->marked->Set(ci))) {
00709         // this node was previously unmarked
00710         this->markedCnt++;
00711 
00712         // if the node for "ci" is in the buffer, remove & process it
00713         GLNode node;
00714         if (this->nodeBuff->Delete(ci, /*OUT*/ node)) {
00715             this->ProcessNode(node);
00716         }
00717     }
00718 }
00719 
00720 void Weeder::ProcessNode(const GLNode &node) throw (FS::Failure)
00721 {
00722     register int i;
00723 
00724     // mark each of the child entries (if any)
00725     CacheEntry::Indices *cis = node.kids;
00726     for (i = 0; i < cis->len; i++) {
00727         this->MarkNode(cis->index[i]);
00728     }
00729 
00730     // write the deriveds reachable from this node (if any)
00731     Derived::Indices *dis = node.refs;
00732     for (i = 0; i < dis->len; i++) {
00733         WriteShortId(this->disFP, dis->index[i]);
00734     }
00735 }
00736 
00737 void Weeder::PrintRoot(const PkgBuild &root, const PkgTbl &pkgTbl,
00738   bool chosenRoot, bool freshRoot) throw ()
00739 {
00740     // print prefix
00741     cout << "  " << (chosenRoot ? '+' : (freshRoot ? '>' : '-')) << ' ';
00742 
00743     // print result
00744     Pathname::T path;
00745     if (pkgTbl.Get(root, /*OUT*/ path)) {
00746         Pathname::Print(cout, path);
00747     } else {
00748         // no match for an exact model; see if we can find a directory
00749         PkgBuild dir(root.pkgDirFP, NullShortId);
00750         if (pkgTbl.Get(dir, /*OUT*/ path)) {
00751             Pathname::Print(cout, path);
00752             char buffer[20];
00753             int spres = sprintf(buffer, "%08x", root.modelShortId);
00754             assert(spres >= 0);
00755             cout << "; modelShortId = 0x" << buffer;
00756         } else {
00757             // we don't have a pathname for this model, so print a "raw" form
00758             root.Print(cout);
00759         }
00760     }
00761     cout << endl;
00762 }
00763 
00764 // Deleting phase -------------------------------------------------------------
00765 
00766 void Weeder::DeletionPhase()
00767   throw (SRPC::failure, FS::Failure, VestaLog::Error, VestaLog::Eof)
00768 {
00769     /* Interaction with the cache is necessary only if there are
00770        some cache entries to weed. */
00771 
00772     if (!(this->weeded->IsEmpty())) {
00773         // form table of PKPrefixes containing "weeded" entries
00774         PKPrefixTbl *pfxTbl = WeededPrefixes();
00775 
00776         // call cache server's "EndMark" method
00777         this->newLogVer = this->cache->EndMark(*(this->weeded), *pfxTbl);
00778         pfxTbl = (PKPrefixTbl *)NULL; // drop on floor for GC
00779     }
00780 
00781     // weed derived files -------------------------------------------------
00782 
00783     // start repository source/derived weed
00784     TimedMsg("Started deleting dead repository files");
00785     int derRes =
00786       SourceOrDerived::keepDerived(this->disShortId, this->startTime);
00787     TimedMsg("Finished deleting dead repository files");
00788 
00789     if (!(this->weeded->IsEmpty())) {
00790         // write graphLog checkpoint & commit it
00791         this->PruneGraphLog();
00792         try
00793           {
00794             bool l_chkpt_accepted =
00795               this->cache->CommitChkpt(this->logChkptFName);
00796 
00797             // If the cache server rejects our graph log checkpoint,
00798             // something is very wrong.
00799             if(!l_chkpt_accepted)
00800               {
00801                 cerr << Debug::Timestamp() << "Fatal error:" << endl;
00802                 cerr << "  graph log checkpoint rejected by cache server"
00803                      << endl;
00804                 cerr << endl;
00805                 exit(1);
00806               }
00807           }
00808         // If anything goes wrong with committing the checkpoint we
00809         // wrote, delete it.
00810         catch(...)
00811           {
00812             try
00813               { FS::Delete(this->logChkptFName); }
00814             catch(FS::Failure)
00815               { /* We don't really care if that operation failed. */ }
00816 
00817             // Re-throw whatever went wrong with committing the
00818             // checkpoint
00819             throw;
00820           }
00821 
00822         // reset "weeded"
00823         this->weeded->ResetAll(/*freeMem=*/ true);
00824         this->WriteWeeded();
00825     }
00826 
00827     // if there were no errors, checkpoint the repository
00828     if (derRes != 0) {
00829         cerr << Debug::Timestamp() << "Fatal repository error" << endl;
00830         cerr << "  Derived weed error = " << derRes << endl;
00831         cerr << endl;
00832     } else {
00833         TimedMsg("Started checkpointing the repository");
00834         SourceOrDerived::checkpoint();
00835         TimedMsg("Finished checkpointing the repository");
00836     }
00837 }
00838 
00839 PKPrefixTbl *Weeder::WeededPrefixes() throw (VestaLog::Error, FS::Failure)
00840 {
00841   // allocate result
00842   PKPrefixTbl *res = NEW_CONSTR(PKPrefixTbl, (/*sizeHint=*/ 100));
00843 
00844   // open graphLog
00845   this->graphLogSeq.Open(/*ver=*/ -1, /*readOnly=*/ true);
00846   try {
00847 
00848     // copy "logSeq" to "pendingGL"
00849     RecoveryReader *rd;
00850     while ((rd = this->graphLogSeq.Next(this->newLogVer)) != NULL) {
00851       while (!rd->eof()) {
00852         GraphLog::Entry *entry = GraphLog::Entry::Recover(*rd);
00853         if (entry->kind == GraphLog::NodeKind) {
00854           GraphLog::Node *node = (GraphLog::Node *)entry;
00855           if (this->weeded->Read(node->ci)) {
00856             PKPrefix::T pfx(*(node->loc));
00857             (void) res->Put(pfx, (char)true);
00858           }
00859         }
00860       }
00861     }
00862 
00863     // close graphLog
00864   } catch (...) { this->graphLogSeq.Close(); throw; }
00865   this->graphLogSeq.Close();
00866   return res;
00867 }
00868 
00869 void Weeder::PruneGraphLog()
00870   throw (FS::Failure, VestaLog::Error, VestaLog::Eof)
00871 {
00872     // pre debugging
00873     if (this->debug >= CacheIntf::LogFlush) {
00874         TimedMsg("Started pruning graph log");
00875     }
00876     int nodeCnt = 0, rootCnt = 0;
00877 
00878     // Figure out the name we will write the checkpoint to.  We append
00879     // a suffix to the "real" checkpoint filename in order to cover a
00880     // potentially problematic situation: two weeders simultaneously
00881     // trying to checkpoint the same cache server's graph log.
00882     Text chkptFileName, rel_chkptFileName;
00883     time_t now = time(0);
00884     bool l_exists;
00885     do
00886       {
00887         OBufStream l_temp_fname;
00888         l_temp_fname << dec << this->newLogVer << ".ckp"
00889                      << AtomicFile::reserved_char << hex << now;
00890 
00891         rel_chkptFileName = l_temp_fname.str();
00892         chkptFileName = Config_GraphLogPath + '/' + rel_chkptFileName;
00893         
00894         // We may need to create a different temporary filename, so
00895         // bump the number we're using.
00896 
00897         now++;
00898       }
00899     // Test to make sure that there isn't already a file with this
00900     // name.  This is a little on the paranoid side, but since
00901     // we're trying to prevent a problem with two weeders doing
00902     // this simultaneously, it seems worthwhile.
00903     while(FS::Exists(chkptFileName));
00904 
00905     // table of roots already written to the graph log, with the value
00906     // set to true when we've written the 'done' root.
00907     RootTbl writtenRootsTbl;
00908 
00909     // open the checkpoint
00910     ofstream chkpt;
00911     FS::OpenForWriting(chkptFileName, /*OUT*/ chkpt);
00912     // Remember the checkpoint filename, as we'll need it to pass to
00913     // the cache server when we call CommitChkpt.
00914     this->logChkptFName = rel_chkptFileName;
00915     try {
00916 
00917     // open the graphLog
00918     this->graphLogSeq.Open(/*ver=*/ -1, /*readOnly=*/ true);
00919     try {
00920 
00921     // read the graphLog, copying relevant nodes to "chkpt"
00922     RecoveryReader *rd;
00923     while ((rd = this->graphLogSeq.Next(this->newLogVer)) != NULL) {
00924         while (!rd->eof()) {
00925             GraphLog::Entry *entry = GraphLog::Entry::Recover(*rd);
00926             switch (entry->kind) {
00927               case GraphLog::RootKind:
00928                 { // start new scope
00929                   GraphLog::Root *root = (GraphLog::Root *)entry;
00930                   PkgBuild pkgBuild(root->pkgVersion, root->model);
00931                   // The value associated with each PkgBuild in
00932                   // this->markedRoots tells us whether it was
00933                   // explicitly kept or by a keep duration.
00934                   bool explicitlyKept = false;
00935                   bool inRootTbl = this->markedRoots->Get(pkgBuild,
00936                                                           explicitlyKept);
00937                   // The value in writtenRootsTbl represents whether
00938                   // we've written the final root for this pkgBuild
00939                   // (the one with done = true).  (It's possible that
00940                   // a pkgBuild might have no such root, if the
00941                   // evaluator never completed.)
00942                   bool doneWritingRoot = false;
00943                   bool inWrittenTbl = writtenRootsTbl.Get(pkgBuild,
00944                                                           doneWritingRoot);
00945                   // We will keep this root if it was explicitly kept
00946                   // or it was kept by freshness and is within the
00947                   // keep duration, as long as we haven't already
00948                   // finished writing the full set of roots for this
00949                   // build.
00950                   if ((inRootTbl && (explicitlyKept ||
00951                                      (root->ts >= this->keepTime))) &&
00952                       (!inWrittenTbl || !doneWritingRoot)) {
00953                       // make sure none of its kids was deleted
00954                       CacheEntry::Indices *cis = root->cis;
00955                       for (int i = 0; i < cis->len; i++) {
00956                           assert(!this->weeded->Read(cis->index[i]));
00957                       }
00958 
00959                       // write it out
00960                       root->Write(chkpt);
00961                       rootCnt++;
00962 
00963                       // add it to the writtenRootsTbl if neccessary,
00964                       // noting whether it is done
00965                       if (root->done)
00966                         {
00967                           writtenRootsTbl.Put(pkgBuild, true);
00968                         }
00969                       else if(!inWrittenTbl)
00970                         {
00971                           inWrittenTbl = writtenRootsTbl.Put(pkgBuild, false);
00972                           assert(!inWrittenTbl);
00973                         }
00974                   }
00975                 }
00976                 break;
00977               case GraphLog::NodeKind:
00978                 { // local scope
00979                   GraphLog::Node *node = (GraphLog::Node *)entry;
00980                   if (!(this->weeded->Read(node->ci))) {
00981                       // this entry was not weeded, so write it
00982                       /* Note: once cutoffs are supported, we have to
00983                          form a new node that does not include the children
00984                          that were weeded. For now, though, so long as the
00985                          node itself was not weeded, none of its children
00986                          was weeded either. */
00987                       node->Write(chkpt);
00988                       nodeCnt++;
00989                   }
00990                 }
00991                 break;
00992               default:
00993                 assert(false);
00994             }
00995         }
00996     }
00997 
00998     // close graphLog
00999     } catch (...) { this->graphLogSeq.Close(); throw; }
01000     this->graphLogSeq.Close();
01001    
01002     // close the checkpoint
01003     } catch (...) { FS::Close(chkpt); throw; }
01004     FS::Close(chkpt);
01005 
01006     // Now that we've finished, we need to do a little sanity
01007     // checking.  It's possible that someone erased the cache but
01008     // *not* the weeder's metadata.  If that's the case the marked
01009     // roots stored stably won't have been in the graph log we're
01010     // pruning, and in fact we may have just erroneously pruned almost
01011     // everything from the graph log.  Before comitting the
01012     // checkpoint, we check for this case.
01013     if (this->debug >= CacheIntf::LogFlush) {
01014         TimedMsg("Checking marked vs. written roots", /*blankline=*/ true);
01015     }
01016 
01017     Table<PkgBuild,bool>::Iterator marked_iterator(this->markedRoots);
01018     PkgBuild pkgBuild;
01019     bool dummy;
01020     // Loop over all the marked roots
01021     while(marked_iterator.Next(pkgBuild, dummy))
01022     {
01023       // Check whether this one was written to the graph log
01024       // checkpoint.
01025       bool inWrittenTbl = writtenRootsTbl.Get(pkgBuild, dummy);
01026       // If this one was never written, and thus never seen in the
01027       // graph log...
01028       if(!inWrittenTbl)
01029         {
01030           // Print out the non-matching sets of roots
01031           TimedMsg("Mismatch ebtween marked and written roots",
01032                    /*blankline=*/ true);
01033           cout << "  marked roots = " << endl;
01034           this->markedRoots->Print(cout, /*indent=*/ 4,
01035                                    "[explicit]", "[fresh]");
01036           cout << "  written roots = " << endl;
01037           writtenRootsTbl.Print(cout, /*indent=*/ 4,
01038                                 "[done]", "[not done]");
01039           cout << endl;
01040 
01041           // Print out an explanation and then exit with error status.
01042           cerr << Debug::Timestamp() << "Fatal error:" << endl
01043                << "  some marked roots never seen while" << endl
01044                << "  checkpointing graph log!" << endl
01045                << endl
01046                << "  *** This may mean that the cache metadata was"   << endl
01047                << "      erased, but the weeder metadata was not."    << endl
01048                << "      In other words, this may be the resumption"  << endl
01049                << "      of a previously failed weed against a"       << endl
01050                << "      different cache than the original weed"      << endl
01051                << "      was run against."                            << endl
01052                << endl
01053                << "  *** If this was not the resumption of a "        << endl
01054                << "      previously failed weed, then there's a bug." << endl
01055                << endl
01056                << "  *** If you don't understand that, you should"    << endl
01057                << "      erase your weeder metadata with:"            << endl
01058                << endl
01059                << "      rm " << Config_WeederMDPath << "/*" << endl
01060                << endl
01061                << "      and re-run the weeder." << endl;
01062           exit(1);
01063           
01064         }
01065     }
01066 
01067     // post debugging
01068     if (this->debug >= CacheIntf::LogFlush) {
01069         TimedMsg("Finished pruning graph log", /*blankline=*/ false);
01070         cout << "  roots written = " << rootCnt << endl;
01071         cout << "  nodes written = " << nodeCnt << endl;
01072         cout << endl;
01073     }
01074 }
01075 
01076 // Stable variables -----------------------------------------------------------
01077 
01078 void Weeder::RecoverWeeded() throw (FS::Failure)
01079 {
01080     // debugging start
01081     if (this->debug >= CacheIntf::LogRecover) {
01082         TimedMsg("Recovering `weeded'", /*blankline=*/ false);
01083     }
01084 
01085     // actual work
01086     ifstream ifs;
01087     try {
01088         FS::OpenReadOnly(Config_WeededFile, /*OUT*/ ifs);
01089         this->weeded = NEW_CONSTR(BitVector, (ifs));
01090         FS::Close(ifs);
01091     }
01092     catch (FS::DoesNotExist) {
01093       this->weeded = NEW(BitVector);
01094     }
01095     catch (FS::EndOfFile) {
01096         // programming error
01097         assert(false);
01098     }
01099 
01100     // debugging end
01101     if (this->debug >= CacheIntf::LogRecover) {
01102         cout << "  weeded = " << *(this->weeded) << "" << endl;
01103         cout << endl;
01104     }
01105 }
01106 
01107 void Weeder::WriteWeeded() throw (FS::Failure)
01108 {
01109     // debugging start
01110     if (this->debug >= CacheIntf::LogFlush) {
01111         TimedMsg("Logging `weeded'", /*blankline=*/ false);
01112         cout << "  weeded = " << endl;
01113         this->weeded->PrintAll(cout, 6);
01114         cout << "    (" << this->weeded->Cardinality() << " total)" << endl
01115              << endl;
01116     }
01117 
01118     // actual work
01119     AtomicFile ofs;
01120     ofs.open(Config_WeededFile.chars(), ios::out);
01121     if (ofs.fail()) {
01122         throw (FS::Failure(Text("Weeder::WriteWeeded"), Config_WeededFile));
01123     }
01124     this->weeded->Write(ofs);
01125     FS::Close(ofs); // swing file pointer atomically
01126 }
01127 
01128 void Weeder::RecoverMiscVars() throw (FS::Failure)
01129 {
01130     // debugging start
01131     if (this->debug >= CacheIntf::LogRecover) {
01132         TimedMsg("Recovering miscellaneous variables", /*blankline=*/ false);
01133     }
01134 
01135     // actual work
01136     ifstream ifs;
01137     try {
01138         FS::OpenReadOnly(Config_MiscVarsFile, /*OUT*/ ifs);
01139 
01140         // Read the startTime/keepTime timestamps as 32-bit integers
01141         // to maintain compatibility between 32-bit and 64-bit
01142         // platforms.
01143         Basics::int32 l_ts;
01144         FS::Read(ifs, (char *)(&l_ts), sizeof_assert(l_ts, 4));
01145         this->startTime = l_ts;
01146         FS::Read(ifs, (char *)(&l_ts), sizeof_assert(l_ts, 4));
01147         this->keepTime = l_ts;
01148 
01149         FS::Read(ifs, (char *)(&(this->disShortId)), sizeof(this->disShortId));
01150         this->markedRoots = NEW_CONSTR(RootTbl, (ifs));
01151         FS::Close(ifs);
01152     }
01153     catch (FS::DoesNotExist) {
01154         this->startTime = -1;
01155     }
01156     catch (FS::EndOfFile) {
01157         // programming error
01158         assert(false);
01159     }
01160 
01161     // debugging end
01162     if (this->debug >= CacheIntf::LogRecover) {
01163         cout << "  startTime = " << this->startTime << endl;
01164         cout << "  disShortId = 0x" << hex << this->disShortId << dec << endl;
01165         cout << "  markedRoots =" << endl;
01166         cout << "    size = " << this->markedRoots->Size() << endl;
01167         if (this->markedRoots->Size() > 0) {
01168             cout << "    content =" << endl;
01169             this->markedRoots->Print(cout, /*indent=*/ 6,
01170                                      "[explicit]", "[fresh]");
01171         }
01172         cout << endl;
01173     }
01174 } // Weeder::RecoverMiscVars
01175 
01176 void Weeder::WriteMiscVars() throw (FS::Failure)
01177 {
01178     // debugging start
01179     if (this->debug >= CacheIntf::LogFlush) {
01180         TimedMsg("Logging miscellaneous variables", /*blankline=*/ false);
01181         cout << "  startTime = " << this->startTime << endl;
01182         cout << "  disShortId = 0x" << hex << this->disShortId << dec << endl;
01183         cout << "  markedRoots =" << endl;
01184         cout << "    size = " << this->markedRoots->Size() << endl;
01185         if (this->markedRoots->Size() > 0) {
01186             cout << "    content =" << endl;
01187             this->markedRoots->Print(cout, /*indent=*/ 6,
01188                                      "[explicit]", "[fresh]");
01189         }
01190         cout << endl;
01191     }
01192 
01193     // actual work
01194     AtomicFile ofs;
01195     ofs.open(Config_MiscVarsFile.chars(), ios::out);
01196     if (ofs.fail()) {
01197         throw FS::Failure(Text("Weeder::WriteMiscVars"), Config_MiscVarsFile);
01198     }
01199 
01200     // Write the startTime/keepTime timestamps as 32-bit integers to
01201     // maintain compatibility between 32-bit and 64-bit platforms.
01202     Basics::int32 l_ts = this->startTime;
01203     FS::Write(ofs, (char *)(&l_ts), sizeof_assert(l_ts, 4));
01204     l_ts = this->keepTime;
01205     FS::Write(ofs, (char *)(&l_ts), sizeof_assert(l_ts, 4));
01206 
01207     FS::Write(ofs, (char *)(&(this->disShortId)), sizeof(this->disShortId));
01208     this->markedRoots->Write(ofs);
01209 
01210     // swing file pointer atomically
01211     FS::Close(ofs);
01212 } // Weeder::WriteMiscVars

Generated on Mon May 8 00:48:59 2006 for Vesta by  doxygen 1.4.2