Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

QuickWeed.C

Go to the documentation of this file.
00001 // Copyright (C) 2001, Compaq Computer Corporation
00002 // 
00003 // This file is part of Vesta.
00004 // 
00005 // Vesta is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 // 
00010 // Vesta is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 // 
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with Vesta; if not, write to the Free Software
00017 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018 
00019 // Last modified on Mon Aug  9 17:50:19 EDT 2004 by ken@xorian.net   
00020 //      modified on Mon Jun 26 19:27:27 PDT 2000 by mann   
00021 //      modified on Sat Jan 29 15:03:27 PST 2000 by heydon 
00022 
00023 /* This program does a quick weed of derived files. It does so
00024    by treating all of the cache entries in the cache server as
00025    roots of the weed. Hence, it does not cause the cache server
00026    to delete any cache entries, and it keeps all derived files
00027    stored in all cache entries in the cache.
00028 
00029    The program works by recording the current time, invoking
00030    the cache server's checkpoint method to flush the graphLog,
00031    and then reading the graphlog to select out all the ShortIDs
00032    of all derived files mentioned in values in the graphLog. It
00033    then calls the appropriate repository methods to weed all
00034    deriveds except for the ones it found. */
00035 
00036 /* Note: The quick weeder does not call several of the cache server's
00037    weeding methods, such as "SetHitFilter", "GetLeases", "EndMark", and
00038    "CommitChkpt". Mainly this is because the quick weeder does not
00039    actually delete any cache entries (or deriveds reachable from them)
00040    that appear in the graph log. As a result, the quick weeder does not
00041    have to re-write the graph log. Since it does not call the "CommitChkpt"
00042    method, the checkpoint started by "StartMark" will never be committed,
00043    which is desired, since the previous checkpoint plus all subsequent logs
00044    should still be taken to constitute the entire graph log after QuickWeed
00045    has run. The next time the cache server's "StartMark" method is called,
00046    any outstanding graph log checkpoint is aborted. */
00047 
00048 #include <Basics.H>
00049 #include <Generics.H>
00050 #include <VestaConfig.H>
00051 #include <SRPC.H>
00052 #include <VestaLog.H>
00053 #include <Recovery.H>
00054 #include <VestaLogSeq.H>
00055 #include <FP.H>
00056 #include <SourceOrDerived.H>
00057 #include <ReadConfig.H>
00058 #include <BitVector.H>
00059 #include <PKPrefix.H>
00060 #include <WeederC.H>
00061 #include <Derived.H>
00062 #include <ParCacheC.H>
00063 #include <Debug.H>
00064 #include <GraphLog.H>
00065 
00066 using std::ostream;
00067 using std::ifstream;
00068 using std::cout;
00069 using std::cerr;
00070 using std::endl;
00071 
00072 class FSFailure {
00073   public:
00074     FSFailure(int sys_errno) throw ()
00075       : sys_errno(sys_errno) { /*SKIP*/ }
00076     friend ostream& operator<<(ostream &os, const FSFailure &f) throw ();
00077   private:
00078     int sys_errno;
00079 };
00080 
00081 ostream& operator<<(ostream &os, const FSFailure &f) throw ()
00082 {
00083     os << "errno = " << f.sys_errno;
00084     return os;
00085 }
00086 
00087 static void WriteShortId(FILE *fp, ShortId id) throw (FSFailure)
00088 /* Write the ShortId "id" to "fp" on its own line in a format understood
00089    by the repository's derived weeder. */
00090 {
00091     int res = fprintf(fp, "%08x\n", id);
00092     if (res < 0) throw (FSFailure(errno));
00093 }
00094 
00095 static void TimedMessage(char *msg) throw ()
00096 {
00097     Debug::Lock();
00098     cerr << Debug::Timestamp() << msg << endl;
00099     Debug::Unlock();
00100 }
00101 
00102 static void ScanGraphLogReader(RecoveryReader &rd, FILE *fp,
00103   /*INOUT*/ int &numEntries, /*INOUT*/ int &numDIs)
00104   throw (VestaLog::Error, VestaLog::Eof, FSFailure)
00105 {
00106     while (!rd.eof()) {
00107         GraphLog::Entry *entry = GraphLog::Entry::Recover(rd);
00108         if (entry->kind == GraphLog::NodeKind) {
00109             GraphLog::Node *node = (GraphLog::Node *)entry;
00110             numEntries++;
00111             Derived::Indices *refs = node->refs;
00112             for (int i = 0; i < refs->len; i++) {
00113                 WriteShortId(fp, refs->index[i]);
00114             }
00115             numDIs += refs->len;
00116         }
00117         delete entry;
00118     }
00119 }
00120 
00121 const char *CacheSection = "CacheServer";
00122 
00123 static Text ReadGraphLogPath() throw ()
00124 {
00125     const Text MDRoot(     ReadConfig::TextVal(CacheSection, "MetaDataRoot"));
00126     const Text MDDir(      ReadConfig::TextVal(CacheSection, "MetaDataDir"));
00127     const Text GraphLogDir(ReadConfig::TextVal(CacheSection, "GraphLogDir"));
00128     return MDRoot +'/'+ MDDir +'/'+ GraphLogDir;
00129 }
00130 
00131 static void ScanGraphLog(FILE *fp, const Text &host, const Text &port)
00132   throw (SRPC::failure, VestaLog::Error, VestaLog::Eof, FSFailure)
00133 /* Scan the graphLog, writing any deriveds appearing in it to "fp". */
00134 {
00135     Text graphLogPath(ReadGraphLogPath());
00136 
00137     // pre debugging
00138     TimedMessage("Started graph log scan");
00139     Debug::Lock();
00140     cerr << "  Configuration file: " << VestaConfig::get_location() << endl;
00141     cerr << "  Hostname:port: " << host << ':' << port << endl;
00142     cerr << "  Graph log: " << graphLogPath << endl;
00143     Debug::Unlock();
00144 
00145     // initialize and make sure another weed is not running
00146     WeederC weeder;  // binds to the cache server
00147     try {
00148       ifstream ifs;
00149       Text weededFile = 
00150         ReadConfig::TextVal("CacheServer", "MetaDataRoot") + "/" +
00151         ReadConfig::TextVal("Weeder", "MetaDataDir") + "/" +
00152         ReadConfig::TextVal("Weeder", "Weeded");
00153       FS::OpenReadOnly(weededFile, /*OUT*/ ifs);
00154       BitVector weeded(ifs);
00155       if (!weeded.IsEmpty()) {
00156         cerr <<
00157           "Fatal error: a VestaWeed is already running or in need of recovery!"
00158              << endl;
00159         cerr << "  If VestaWeed is running, wait for it to finish." <<endl;
00160         cerr << "  If VestaWeed is not running, either run it, allowing"<<endl;
00161         cerr << "  the prior weed to finish, or use EraseCache to" <<endl;
00162         cerr << "  completely delete the cache and the weeding state." << endl;
00163         exit(1);
00164       }
00165       FS::Close(ifs);
00166     }
00167     catch (FS::DoesNotExist) {
00168       // good, no weed to recover
00169     }
00170     
00171     bool conflict = weeder.WeederRecovering(/*doneMarking=*/ false);
00172     if (conflict) {
00173       cerr <<
00174         "Fatal error: a VestaWeed or QuickWeed is already running!" << endl;
00175       exit(1);
00176     }
00177 
00178     // checkpoint the cache server
00179     int newLogVer;
00180     (void) weeder.StartMark(/*OUT*/ newLogVer);
00181     weeder.ResumeLeaseExp();
00182 
00183     // open & read "graphLog"
00184     VestaLogSeq graphLogSeq(graphLogPath.chars());
00185     graphLogSeq.Open(/*ver=*/ -1, /*readonly=*/ true);
00186     RecoveryReader *rd;
00187     int numEntries = 0, numDIs = 0;
00188     while ((rd = graphLogSeq.Next(newLogVer)) != (RecoveryReader *)NULL) {
00189         ScanGraphLogReader(*rd, fp, /*INOUT*/ numEntries, /*INOUT*/ numDIs);
00190     }
00191 
00192     // post debugging
00193     TimedMessage("Finished graph log scan");
00194 
00195     // print stats
00196     Debug::Lock();
00197     cerr << "  Graph log entries processed = " << numEntries << endl;
00198     cerr << "  Derived files found = " << numDIs << endl;
00199     Debug::Unlock();
00200 }
00201 
00202 static void ScanFromConfig(FILE *fp)
00203   throw (SRPC::failure, VestaLog::Error, VestaLog::Eof, FSFailure)
00204 {
00205     // specify the machine on which the cache server is running
00206     Text host(ReadConfig::TextVal(CacheSection, "Host"));
00207     Text port(ReadConfig::TextVal(CacheSection, "Port"));
00208     ParCacheC::SetServerHost(host.chars());
00209 
00210     // scan the graphLog
00211     ScanGraphLog(fp, host, port);
00212 }
00213 
00214 static void SyntaxError(char *msg, char *arg = NULL) throw ()
00215 {
00216     cerr << "Error: " << msg;
00217     if (arg != NULL) cerr << ": \"" << arg << "\"";
00218     cerr << "; exiting..." << endl;
00219     cerr << "Syntax: QuickWeed [ -n ] [ -i ] [ -cf config-file ] ..." << endl;
00220     exit(1);
00221 }
00222 
00223 int main(int argc, char *argv[]) 
00224 {
00225     TextSeq configFiles;   // configuration files to scan
00226     bool doCentral = true; // scan graphLog of default config file?
00227     bool doWeeds = true;   // do the source and derived weeds?
00228     bool ckptRepos = true; // checkpoint repository after weeding?
00229     bool noCache = false;  // ignore the cache altogether
00230 
00231     // process command-line
00232     for (int arg = 1; arg < argc; arg++) {
00233         if (*argv[arg] == '-') {
00234             if (strcmp(argv[arg], "-n") == 0) {
00235                 doWeeds = false;
00236             } else if (strcmp(argv[arg], "-i") == 0) {
00237                 doCentral = false;
00238             } else if (strcmp(argv[arg], "-p") == 0) {
00239                 ckptRepos = false;
00240             } else if (strcmp(argv[arg], "-no-cache") == 0) {
00241                 noCache = true;
00242             } else if (strcmp(argv[arg], "-cf") == 0) {
00243                 if (++arg < argc) {
00244                     Text path(argv[arg]);
00245                     if (path[path.Length()-1] == '/') {
00246                         path += "vesta.cfg";
00247                     }
00248                     configFiles.addhi(path);
00249                 } else {
00250                     SyntaxError("expecting config-file after \"-cf\"");
00251                 }
00252             } else {
00253                 SyntaxError("unrecognized switch", argv[arg]);
00254             }
00255         } else {
00256             SyntaxError("unrecognized argument", argv[arg]);
00257         }
00258     }
00259 
00260     if (!doCentral && configFiles.size() == 0)
00261       {
00262         if(noCache)
00263           {
00264             cerr << "NOTE: ignoring all caches at your request." << endl
00265                  << "      (If there are any caches using this repository," << endl
00266                  << "      this will invalidate them!)" << endl;
00267           }
00268         else
00269           {
00270             cerr << "Error: -i specified without -cf; exiting..." << endl;
00271             exit(1);
00272           }
00273       }
00274     else if(noCache)
00275       {
00276         cerr << "Error: -no-cache specified without -i or with -cf; exiting..."
00277              << endl;
00278         exit(1);
00279       }
00280 
00281     try {
00282         FILE *fp = (FILE *)NULL;
00283         ShortId disShortId;
00284 
00285         if (doWeeds) {
00286             // create file for writing ShortIds to
00287             int fd = SourceOrDerived::fdcreate(
00288               /*OUT*/ disShortId, /*leafflag=*/ true);
00289             if (fd == -1) throw FSFailure(errno);
00290             fp = fdopen(fd, "w");
00291             Debug::Lock();
00292             fprintf(stderr, "Writing ShortIds of deriveds to keep to file ");
00293             fprintf(stderr, "%08x\n", disShortId);
00294             Debug::Unlock();
00295         } else {
00296             fp = stdout;
00297         }
00298 
00299         if (doWeeds) {
00300             TimedMessage("Started marking derived files to keep");
00301             // write the ShortId of the file itself
00302             WriteShortId(fp, disShortId);
00303         }
00304 
00305         // record start time (for later call to "keepDerived")
00306         time_t startT;
00307         (void) time(&startT);
00308 
00309         // scan all relevant graphLog files
00310         if (doCentral) ScanFromConfig(fp);
00311         while (configFiles.size() > 0) {
00312             VestaConfig::set_location(configFiles.remlo());
00313             ScanFromConfig(fp);
00314         }
00315 
00316         if (doWeeds) {
00317             // close the output file
00318             if (fflush(fp) == EOF || fclose(fp) == EOF) {
00319                 throw (FSFailure(errno));
00320             }
00321             TimedMessage("Finished marking derived files to keep");
00322 
00323             // delete the derived files
00324             TimedMessage(
00325                 "Started marking sources and deleting unreachable files");
00326             int derRes =
00327                 SourceOrDerived::keepDerived(disShortId, startT);
00328             TimedMessage(
00329                 "Finished marking sources and deleting unreachable files");
00330             if (derRes != 0) {
00331                 cerr << "Derived weed error = " << derRes << endl;
00332             } else {
00333                 if (ckptRepos) {
00334                     TimedMessage("Started checkpointing the repository");
00335                     SourceOrDerived::checkpoint();
00336                     TimedMessage("Finished checkpointing the repository");
00337                 }
00338             }
00339         }
00340     }
00341     catch (SRPC::failure &f) {
00342         cerr << "SRPC failure: " << f.msg << "; exiting..." << endl;
00343         exit(1);
00344     }
00345     catch (VestaLog::Error &err) {
00346         cerr << "VestaLog fatal error -- failed reading graph log:" << endl;
00347         cerr << "  " << err.msg << endl;
00348         cerr << "Exiting..." << endl;
00349         exit(1);
00350     }
00351     catch (VestaLog::Eof) {
00352         cerr << "VestaLog fatal error: ";
00353         cerr << "unexpected EOF while reading graph log; exiting..." << endl;
00354         exit(1);
00355     }
00356     catch (FSFailure &f) {
00357         cerr << "Error creating/writing derived keep file, "
00358           << f << "; exiting..." << endl;
00359         exit(1);
00360     }
00361 }

Generated on Mon May 8 00:48:59 2006 for Vesta by  doxygen 1.4.2