Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

glue.C

Go to the documentation of this file.
00001 // Copyright (C) 2001, Compaq Computer Corporation
00002 // 
00003 // This file is part of Vesta.
00004 // 
00005 // Vesta is free software; you can redistribute it and/or
00006 // modify it under the terms of the GNU Lesser General Public
00007 // License as published by the Free Software Foundation; either
00008 // version 2.1 of the License, or (at your option) any later version.
00009 // 
00010 // Vesta is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // Lesser General Public License for more details.
00014 // 
00015 // You should have received a copy of the GNU Lesser General Public
00016 // License along with Vesta; if not, write to the Free Software
00017 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018 
00019 //
00020 // glue.C
00021 //
00022 // Glue code between nfsd.C and repository implementation
00023 //
00024 
00025 #if __linux__
00026 #include <stdint.h>
00027 #endif
00028 #include <pthread.h>
00029 #include <errno.h>
00030 #include <iomanip>
00031 #include "VestaSource.H"
00032 #include "ShortIdBlock.H"
00033 #include "FdCache.H"
00034 #include "VRConcurrency.H"
00035 #include "VLogHelp.H"
00036 #include "VestaConfig.H"
00037 #include "Mastership.H"
00038 #include "nfsd.H"
00039 #include "logging.H"
00040 #include <assert.h>
00041 #include "VestaSourceImpl.H"
00042 #include "CharsSeq.H"
00043 #include "CopyShortId.H"
00044 
00045 #include "timing.H"
00046 #include "lock_timing.H"
00047 
00048 #include <BufStream.H>
00049 
00050 using std::ostream;
00051 using std::endl;
00052 using std::setw;
00053 using std::setfill;
00054 using std::hex;
00055 using std::dec;
00056 using Basics::OBufStream;
00057 
00058 #if __digital__
00059 extern "C" int statfs(const char*, struct statfs*);
00060 #else
00061 #if __linux__
00062 #include <sys/vfs.h>
00063 #endif
00064 #endif
00065 
00066 
00067 // Import
00068 extern int nfs_port;
00069 
00070 #define VR_FSID nfs_port          // arbitrary value
00071 #define VR_RDEV 9999              // arbitrary value
00072 #define VR_BLOCKSIZE NFS_MAXDATA  // optimal block size for i/o
00073 #define VR_BLOCKUNIT 512          // unit for st_blocks (POSIX standard)
00074 #define VR_DEFAULT_COW_MAX 4      // max copy-on-writes in progress at once
00075 
00076 // Global
00077 bool shortIdSymlink = false;  // implement shortids as symlinks
00078 Text shortIdSymlinkPrefix;
00079 int shortIdSymlinkLength = 0;
00080 bool allowSymlink = false;  // allow symlink() through NFS interface
00081 bool bsdChown = false;
00082 
00083 // Ensures the same file is not copied twice by different threads.
00084 static Basics::mutex cowLock;
00085 static Basics::cond cowCond;
00086 int cowMax = VR_DEFAULT_COW_MAX;
00087 struct CowInProgress {
00088   bool active;
00089   LongId longid;
00090 };
00091 CowInProgress *cowInProgress;
00092 
00093 const struct {
00094         nfsstat error;
00095         int errno_val;
00096 } nfs_errtbl[]= {
00097         { NFS_OK,               0               },
00098         { NFSERR_PERM,          EPERM           },
00099         { NFSERR_NOENT,         ENOENT          },
00100         { NFSERR_IO,            EIO             },
00101         { NFSERR_NXIO,          ENXIO           },
00102         { NFSERR_ACCES,         EACCES          },
00103         { NFSERR_EXIST,         EEXIST          },
00104         { NFSERR_NODEV,         ENODEV          },
00105         { NFSERR_NOTDIR,        ENOTDIR         },
00106         { NFSERR_ISDIR,         EISDIR          },
00107         { NFSERR_INVAL,         EINVAL          },
00108         { NFSERR_FBIG,          EFBIG           },
00109         { NFSERR_NOSPC,         ENOSPC          },
00110         { NFSERR_ROFS,          EROFS           },
00111         { NFSERR_NAMETOOLONG,   ENAMETOOLONG    },
00112         { NFSERR_NOTEMPTY,      ENOTEMPTY       },
00113 #ifdef EDQUOT
00114         { NFSERR_DQUOT,         EDQUOT          },
00115 #endif
00116         { NFSERR_STALE,         ESTALE          },
00117         { NFSERR_WFLUSH,        EIO             },
00118         { (nfsstat) -1,         EIO             }
00119 };
00120 
00121 /* Lookup a UNIX error code and return NFS equivalent. */
00122 nfsstat
00123 xlate_errno(int errno_val)
00124 {
00125     int i;
00126 
00127     for (i = 0; nfs_errtbl[i].error != -1; i++) {
00128         if (nfs_errtbl[i].errno_val == errno_val)
00129           return (nfs_errtbl[i].error);
00130     }
00131     Text etxt = Basics::errno_Text(errno_val);
00132     Repos::dprintf(DBG_ALWAYS, "non-standard errno: %d (%s)\n",
00133                    errno_val, etxt.cchars());
00134     return (NFSERR_IO);
00135 }
00136 
00137 nfsstat
00138 xlate_vserr(VestaSource::errorCode err)
00139 {
00140     switch (err) {
00141       case VestaSource::ok:
00142         return NFS_OK;
00143       case VestaSource::notFound:
00144         return NFSERR_NOENT;
00145       case VestaSource::noPermission:
00146         return NFSERR_ACCES;
00147       case VestaSource::nameInUse:
00148         return NFSERR_EXIST;
00149       case VestaSource::inappropriateOp:
00150       case VestaSource::notMaster:
00151         // Perhaps questionable message choice.  Rationale: these
00152         // errors are usually an attempt to mutate something immutable.
00153         return NFSERR_ROFS;
00154       case VestaSource::nameTooLong:
00155         return NFSERR_NAMETOOLONG;
00156       case VestaSource::rpcFailure:
00157         return NFSERR_IO;
00158       case VestaSource::notADirectory:
00159         return NFSERR_NOTDIR;
00160       case VestaSource::isADirectory:
00161         return NFSERR_ISDIR;
00162       case VestaSource::invalidArgs:
00163       case VestaSource::longIdOverflow:
00164         return NFSERR_INVAL;
00165       case VestaSource::outOfSpace:
00166         return NFSERR_NOSPC;
00167       default:
00168         return NFSERR_NXIO;
00169     }
00170 }
00171 
00172 
00173 struct ToUnixIdClosure {
00174   uid_t id;
00175   bool setid;
00176   VestaSource* vs;
00177 };
00178 
00179 static bool
00180 toUnixUidCallback(void* closure, const char* value)
00181 {
00182   ToUnixIdClosure* cl = (ToUnixIdClosure*) closure;
00183   const char* at = strchr(value, '@');
00184   if (at && strcasecmp(at + 1, AccessControl::realm) == 0) {
00185     cl->id = AccessControl::globalToUnixUser(value);
00186     cl->setid = cl->vs->inAttribs("#setuid", value);
00187     return false;
00188   }
00189   return true;
00190 }
00191 
00192 static bool
00193 toUnixGidCallback(void* closure, const char* value)
00194 {
00195   ToUnixIdClosure* cl = (ToUnixIdClosure*) closure;
00196   const char* at = strchr(value, '@');
00197   if (at && strcasecmp(at + 1, AccessControl::realm) == 0) {
00198     cl->id = AccessControl::globalToUnixGroup(value);
00199     cl->setid = cl->vs->inAttribs("#setgid", value);
00200     return false;
00201   }
00202   return true;
00203 }
00204 
00205 
00206 void
00207 file_fattr(fattr* attr, struct stat* st, VestaSource* vs)
00208 {
00209     // Kludge: some side effects on vs->ac
00210     if (RootLongId.isAncestorOf(vs->longid)) {
00211         // Immutable; turn off write access
00212         vs->ac.mode &= ~0222;
00213     } else if (FileShortIdRootLongId.isAncestorOf(vs->longid) &&
00214                (st->st_mode & 0222) == 0) {
00215         // Old file in volatileROEDirectory, immutable
00216         vs->ac.mode &= ~0222;
00217         vs->type = VestaSource::immutableFile;
00218     }
00219     attr->mode = NFSMODE_REG | vs->ac.mode;
00220     if (st->st_mode & 0111) {
00221         // Set execute access same as read access
00222         attr->mode = (attr->mode & ~0111) | ((attr->mode & 0444) >> 2);
00223     } else {
00224         // Turn off execute access
00225         attr->mode &= ~0111;
00226     }
00227 
00228     // Find local #owner if any, and see if it is on #setuid list
00229     ToUnixIdClosure cl;
00230     cl.id = AccessControl::vforeignUser;
00231     cl.setid = false;
00232     cl.vs = vs;
00233     vs->ac.owner.getAttrib("#owner", toUnixUidCallback, &cl);
00234     attr->uid = cl.id;
00235     if (cl.setid) attr->mode |= 04000;
00236 
00237     // Find local #group if any, and see if it is on #setgid list
00238     cl.id = AccessControl::vforeignGroup;
00239     cl.setid = false;
00240     cl.vs = vs;
00241     vs->ac.group.getAttrib("#group", toUnixGidCallback, &cl);
00242     attr->gid = cl.id;
00243     if (cl.setid) attr->mode |= 02000;
00244 
00245     attr->type = NFREG;
00246     attr->nlink = vs->linkCount();
00247     attr->size = st->st_size;
00248     attr->blocksize = st->st_blksize;
00249     attr->rdev = VR_RDEV;
00250     attr->blocks = st->st_blocks;
00251     attr->fsid = VR_FSID;
00252     attr->fileid = vs->pseudoInode;
00253     attr->atime.seconds = st->st_atime;
00254     attr->mtime.seconds = st->st_mtime;
00255     attr->ctime.seconds = st->st_ctime;
00256 #if __digital__
00257     attr->atime.useconds = st->st_spare1;
00258     attr->mtime.useconds = st->st_spare2;
00259     attr->ctime.useconds = st->st_spare3;
00260 #else
00261     attr->atime.useconds = 0;
00262     attr->mtime.useconds = 0;
00263     attr->ctime.useconds = 0;
00264 #endif
00265 }
00266 
00267 // Utility to get last modified time of attributes
00268 typedef struct {
00269     time_t time;
00270 } AttrModTimeClosure;
00271 
00272 bool
00273 attrModTimeCallback(void* closure, VestaSource::attribOp op, const char* name,
00274                     const char* value, time_t timestamp)
00275 {
00276     AttrModTimeClosure *cl = (AttrModTimeClosure *) closure;
00277     if (cl->time < timestamp) cl->time = timestamp;
00278     return true;
00279 }
00280 
00281 time_t
00282 attrModTime(VestaSource *vs)
00283 {
00284     AttrModTimeClosure cl;
00285     cl.time = 2;  // arbitrary nonzero value
00286     vs->getAttribHistory(attrModTimeCallback, &cl);
00287     return cl.time;
00288 }
00289 
00290 // If vs is a file, fd may be either -1 or an open file descriptor
00291 nfsstat
00292 any_fattr(fattr* attr, VestaSource* vs, int fd)
00293 {
00294     struct stat st;
00295     int res;
00296     char *path;
00297     const char *linkval;
00298     ToUnixIdClosure cl;
00299     
00300     switch (vs->type) {
00301       case VestaSource::immutableFile:
00302       case VestaSource::mutableFile:
00303         {
00304           bool close_fd = false;
00305           FdCache::OFlag ofl;
00306           if (fd == -1)
00307             {
00308               fd = FdCache::tryopen(vs->shortId(), FdCache::any, &ofl);
00309               close_fd = (fd != -1);
00310             }
00311           if (fd == -1)
00312             {
00313               // No file descriptor provided and none in the FdCache:
00314               // use stat.
00315               char *sid_fname = ShortIdBlock::shortIdToName(vs->shortId());
00316               RECORD_TIME_POINT;
00317               res = stat(sid_fname, &st);
00318               RECORD_TIME_POINT;
00319               delete [] sid_fname;
00320             }
00321           else
00322             {
00323               // We have a file descriptor: use fstat
00324               RECORD_TIME_POINT;
00325               res = fstat(fd, &st);
00326               RECORD_TIME_POINT;
00327             }
00328           // Return the file descriptor to the FdCache if we got it
00329           // from there.
00330           if(close_fd)
00331             FdCache::close(vs->shortId(), fd, ofl);
00332         }
00333         if (res < 0) {
00334             return xlate_errno(errno);
00335         }
00336 
00337         if (shortIdSymlink &&
00338             FileShortIdRootLongId.isAncestorOf(vs->longid) &&
00339             (st.st_mode & 0222) == 0) {
00340             // Manifest as symlink
00341             attr->type = NFLNK;
00342             attr->mode = NFSMODE_LNK | 0444;
00343             attr->nlink = 1;
00344             attr->uid = 0;
00345             attr->gid = 0;
00346             attr->size = shortIdSymlinkLength;
00347             attr->blocksize = VR_BLOCKSIZE;
00348             attr->rdev = VR_RDEV;
00349             attr->blocks = (attr->size + VR_BLOCKUNIT - 1)/VR_BLOCKUNIT;
00350             attr->fsid = VR_FSID;
00351             attr->fileid = vs->pseudoInode;
00352             attr->atime.seconds = attr->mtime.seconds = 
00353               attr->ctime.seconds = 2;    // arbitrary nonzero value
00354             attr->atime.useconds = attr->mtime.useconds =
00355               attr->ctime.useconds = 0;
00356             break;
00357         }
00358         file_fattr(attr, &st, vs);
00359         break;
00360         
00361       case VestaSource::device:
00362         attr->type = NFCHR;
00363         attr->mode = NFSMODE_CHR | 0666;
00364         attr->nlink = 1;
00365         attr->uid = 0;
00366         attr->gid = 0;
00367         attr->size = 0;
00368         attr->blocksize = VR_BLOCKSIZE;
00369         attr->rdev = vs->shortId(); // device number, not a real shortId
00370         attr->blocks = 0;
00371         attr->fsid = VR_FSID;
00372         attr->fileid = vs->pseudoInode;
00373         attr->atime.seconds = attr->mtime.seconds = 
00374           attr->ctime.seconds = 2;    // arbitrary nonzero value
00375         attr->atime.useconds = attr->mtime.useconds =
00376           attr->ctime.useconds = 0;
00377         break;
00378         
00379       case VestaSource::immutableDirectory:
00380       case VestaSource::appendableDirectory:
00381       case VestaSource::mutableDirectory:
00382       case VestaSource::volatileDirectory:
00383       case VestaSource::volatileROEDirectory:
00384       case VestaSource::evaluatorDirectory:
00385       case VestaSource::evaluatorROEDirectory:
00386         attr->type = NFDIR;
00387         attr->mode = NFSMODE_DIR | vs->ac.mode;
00388         if (vs->type == VestaSource::immutableDirectory
00389             && RootLongId.isAncestorOf(vs->longid)) {
00390             // Turn off write access
00391             attr->mode &= ~0222;  
00392         }
00393         attr->nlink = 1;
00394 
00395         // Find local #owner if any
00396         cl.id = AccessControl::vforeignUser;
00397         cl.setid = false;
00398         cl.vs = vs;
00399         vs->ac.owner.getAttrib("#owner", toUnixUidCallback, &cl);
00400         attr->uid = cl.id;
00401 
00402         // Find local #group if any
00403         cl.id = AccessControl::vforeignGroup;
00404         cl.setid = false;
00405         cl.vs = vs;
00406         vs->ac.group.getAttrib("#group", toUnixGidCallback, &cl);
00407         attr->gid = cl.id;
00408 
00409         attr->size = 1 * VR_BLOCKUNIT;  // arbitrary value
00410         attr->blocksize = VR_BLOCKSIZE;
00411         attr->rdev = VR_RDEV;
00412         attr->blocks = 1;               // must be consistent with attr->size
00413         attr->fsid = VR_FSID;
00414         attr->fileid = vs->pseudoInode;
00415         attr->atime.seconds = attr->mtime.seconds = 
00416           attr->ctime.seconds = vs->timestamp();
00417         attr->atime.useconds = attr->mtime.useconds =
00418           attr->ctime.useconds = 0;
00419         break;
00420         
00421       case VestaSource::ghost:
00422       case VestaSource::stub:
00423         if ((linkval = vs->getAttribConst("symlink-to")) != NULL) {
00424             // Manifest as symlink.  Make mode bits reflect the right
00425             // to read/change attributes.
00426             attr->type = NFLNK;
00427             attr->mode = NFSMODE_LNK | 0444 |
00428               (vs->ac.mode & (S_IWUSR|S_IWGRP|S_IWOTH));
00429             if (strcmp(linkval, "$LAST") == 0) {
00430                 // Allow for $LAST to be expanded to a 32-bit decimal
00431                 // number.  Set the timestamps to those of the parent
00432                 // directory to help prevent outdated values from
00433                 // being cached.
00434                 attr->size = 10;
00435                 VestaSource* parent = vs->longid.getParent().lookup();
00436                 attr->atime.seconds = attr->mtime.seconds =
00437                     attr->ctime.seconds = parent->timestamp();
00438                 attr->atime.useconds = attr->mtime.useconds =
00439                     attr->ctime.useconds = 0;
00440                 delete parent;
00441             } else {
00442                 attr->size = strlen(linkval);
00443                 attr->atime.seconds = attr->mtime.seconds =
00444                     attr->ctime.seconds = attrModTime(vs);
00445                 attr->atime.useconds = attr->mtime.useconds =
00446                     attr->ctime.useconds = 0;
00447             }
00448         } else if (vs->type == VestaSource::ghost) {
00449             // Manifest as file with mode ---------T
00450             // Should ghosts be optionally invisible?
00451             attr->type = NFREG;
00452             attr->mode = NFSMODE_REG | S_ISVTX;
00453             attr->size = 0;
00454             attr->atime.seconds = attr->mtime.seconds = 
00455               attr->ctime.seconds = 2;    // arbitrary nonzero value
00456             attr->atime.useconds = attr->mtime.useconds =
00457               attr->ctime.useconds = 0;
00458         } else if (vs->master) {
00459             // Manifest as file with mode --wS-w--w-
00460             // using real write perms.
00461             attr->type = NFREG;
00462             attr->mode = NFSMODE_REG |
00463               (vs->ac.mode & (S_IWUSR|S_IWGRP|S_IWOTH)) | S_ISUID;
00464             attr->size = 0;
00465             attr->atime.seconds = attr->mtime.seconds = 
00466               attr->ctime.seconds = 2;    // arbitrary nonzero value
00467             attr->atime.useconds = attr->mtime.useconds =
00468               attr->ctime.useconds = 0;
00469         } else {
00470             // Manifest as file with mode ------S---
00471             attr->type = NFREG;
00472             attr->mode = NFSMODE_REG | S_ISGID;
00473             attr->size = 0;
00474             attr->atime.seconds = attr->mtime.seconds = 
00475               attr->ctime.seconds = 2;    // arbitrary nonzero value
00476             attr->atime.useconds = attr->mtime.useconds =
00477               attr->ctime.useconds = 0;
00478         }
00479         attr->nlink = 1;
00480 
00481         // Find local #owner if any
00482         cl.id = AccessControl::vforeignUser;
00483         cl.setid = false;
00484         cl.vs = vs;
00485         vs->ac.owner.getAttrib("#owner", toUnixUidCallback, &cl);
00486         attr->uid = cl.id;
00487 
00488         // Find local #group if any
00489         cl.id = AccessControl::vforeignGroup;
00490         cl.setid = false;
00491         cl.vs = vs;
00492         vs->ac.group.getAttrib("#group", toUnixGidCallback, &cl);
00493         attr->gid = cl.id;
00494 
00495         attr->blocksize = VR_BLOCKSIZE;
00496         attr->rdev = VR_RDEV;
00497         attr->blocks = (attr->size + VR_BLOCKUNIT - 1)/VR_BLOCKUNIT;
00498         attr->fsid = VR_FSID;
00499         attr->fileid = vs->pseudoInode;
00500         break;
00501         
00502       default:
00503         assert(false);
00504         break;
00505     }
00506     if (!vs->master) {
00507         // Turn off write access
00508         attr->mode &= ~0222;  
00509     }
00510     return NFS_OK;
00511 }
00512 
00513 static void
00514 stalemsg(const char* what, const LongId* longid)
00515 {
00516     if (Repos::isDebugLevel(DBG_STALENFS)) {
00517         char lid[256];
00518         OBufStream ost(lid, sizeof(lid));
00519         ost << what << " on stale handle " << *longid << endl;
00520         Repos::dprintf(DBG_ALWAYS, "%s", ost.str());
00521     }
00522 }
00523 
00524 //
00525 // The immutableFile vs needs to be copied; do so.  Caller does not
00526 // hold a lock, but vs is known to be a VLeaf, so its fields do not
00527 // need to be protected.  A new VestaSource is returned, again with no
00528 // lock held.
00529 //
00530 VestaSource* do_cow(VestaSource* vs, /*OUT*/nfsstat* status, 
00531                     Basics::uint64 len= ((Basics::uint64)-1))
00532 {
00533   VestaSource* tmpvs1 = NULL;
00534   VestaSource* tmpvs2 = NULL;
00535   VestaSource* newvs = NULL;
00536   nfsstat st = NFS_OK;
00537   ReadersWritersLock* lock = NULL;
00538   int cowi = -1;
00539 
00540   // Enter the longid into the cowInProgress structure to ensure that
00541   // only one thread tries to do the copy.  This logical lock must be
00542   // acquired before the readLock or writeLock to avoid deadlock.
00543   cowLock.lock();
00544   for (;;) {
00545     int i;
00546     for (i=0; i<cowMax; i++) {
00547       if (cowInProgress[i].active) {
00548         if (cowInProgress[i].longid == vs->longid) {
00549           cowi = -1;
00550           break;
00551         }
00552       } else {
00553         if (cowi == -1) cowi = i;
00554       }
00555     }
00556     if (cowi != -1) break;
00557     cowCond.wait(cowLock);
00558   }
00559   cowInProgress[cowi].active = true;
00560   cowInProgress[cowi].longid = vs->longid;
00561   cowLock.unlock();
00562 
00563   // Acquire the read lock to check that the copy is still needed
00564   // and the file is still in the directory structure.
00565   tmpvs1 = vs->longid.lookup(LongId::readLock, &lock);
00566   if (tmpvs1 == NULL) {
00567     st = NFSERR_STALE;
00568     stalemsg("write", &vs->longid);
00569     goto error;
00570   }
00571 
00572   RWLOCK_LOCKED_REASON(lock, "do_cow:checking");
00573 
00574   // COW still needed?
00575   if (tmpvs1->type == VestaSource::immutableFile) {
00576     // Still needed.  Drop the readLock while doing the copy.
00577     lock->releaseRead();
00578     lock = NULL;
00579 
00580     // Copy to a new sid
00581     int copy_errno;
00582     ShortId newsid = CopyShortId(tmpvs1->shortId(), copy_errno,
00583                                  len, &len);
00584     if (newsid == NullShortId) {
00585       st = xlate_errno(copy_errno);
00586       goto error;
00587     }
00588 
00589     // Log a debugging message
00590     if (Repos::isDebugLevel(DBG_COW)) {
00591       char msg[256];
00592       OBufStream ost(msg, sizeof(msg));
00593       ost << "copy on write: longid " << vs->longid
00594           << ", from sid 0x" << hex << tmpvs1->shortId()
00595           << ", to sid 0x" << newsid
00596           << ", length " << dec << len << endl;
00597       Repos::dprintf(DBG_ALWAYS, "%s", ost.str());
00598     }
00599 
00600     // Acquire the writeLock.  Must redo lookup because releasing
00601     // the read lock allows the directory structure to change.
00602     tmpvs2 = vs->longid.lookup(LongId::writeLock, &lock);
00603     if (tmpvs2 == NULL) {
00604       st = NFSERR_STALE;
00605       stalemsg("write", &vs->longid);
00606       goto error;
00607     }
00608 
00609     RWLOCK_LOCKED_REASON(lock, "do_cow:copying");
00610 
00611     // Update the directory structure to point to the new sid
00612     VestaSource::errorCode err = vs->makeMutable(newvs, newsid);
00613     if (err != VestaSource::ok) {
00614       st = xlate_vserr(err);
00615       goto error;
00616     }
00617 
00618   } else if (tmpvs1->type != VestaSource::mutableFile) {
00619     // Type changed.  Can this really happen?
00620     st = NFSERR_ISDIR;
00621   } else {
00622     // It's mutable, which means another thread must have done the
00623     // copy just before us.
00624     newvs = tmpvs1;
00625     tmpvs1 = NULL;
00626   }
00627 
00628  error:
00629   if (lock) lock->release();
00630   cowLock.lock();
00631   cowInProgress[cowi].active = false;
00632   cowLock.unlock();
00633   cowCond.broadcast();
00634   *status = st;
00635   if (tmpvs1) delete tmpvs1;
00636   if (tmpvs2) delete tmpvs2;
00637   if (st != NFS_OK && newvs) {
00638     delete newvs;
00639     newvs = NULL;
00640   }
00641   return newvs;
00642 }
00643 
00644 //
00645 // Get NFS attributes for a file or directory in the repository
00646 //
00647 nfsstat
00648 do_getattr(nfs_fh* fh, fattr* attr, AccessControl::Identity cred)
00649 {
00650     nfsstat status = NFS_OK;
00651     ReadersWritersLock* lock;
00652     RECORD_TIME_POINT;
00653     VestaSource* vs = ((LongId*) fh)->lookup(LongId::readLock, &lock);
00654     RECORD_TIME_POINT;
00655 
00656     RWLOCK_LOCKED_REASON(lock, "NFS:getattr");
00657 
00658     if (vs == NULL) {
00659       if(*((LongId*) fh) == NullLongId)
00660         {
00661           status = NFSERR_INVAL;
00662         }
00663       else
00664         {
00665           status = NFSERR_STALE;
00666           stalemsg("getattr", (LongId*) fh);
00667         }
00668       goto finish;
00669     }
00670     // If this is a VLeaf, it's safe to release the lock before
00671     // calling any_fattr.
00672     if((lock != NULL) &&
00673        ((vs->type == VestaSource::immutableFile) ||
00674         (vs->type == VestaSource::mutableFile)))
00675       {
00676         lock->releaseRead();
00677         lock = NULL;
00678       }
00679     status = any_fattr(attr, vs, -1);
00680     RECORD_TIME_POINT;
00681   finish:
00682     if (lock != NULL) lock->releaseRead();
00683     if (vs != NULL) delete vs;
00684     RECORD_TIME_POINT;
00685     return status;
00686 }
00687 
00688 // Support for removeOldFromRealm; see below
00689 struct RemoveOldFromRealmClosure {
00690   VestaSource* vs;
00691   CharsSeq oldvals;
00692 };
00693 
00694 static bool
00695 removeOldFromRealmCallback(void* closure, const char* value)
00696 {
00697   RemoveOldFromRealmClosure* cl = (RemoveOldFromRealmClosure*) closure;
00698   const char* at = strrchr(value, '@');
00699   if (at && strcasecmp(at + 1, AccessControl::realm) == 0) {
00700     cl->oldvals.addhi(value);
00701   }
00702   return true;
00703 }
00704 
00705 struct CopyInheritedClosure {
00706   VestaSource* vs;
00707   const char* name;
00708 };
00709 
00710 static bool
00711 copyInheritedCallback(void* closure, VestaAttribs::attribOp op,
00712                       const char* name, const char* value,
00713                       time_t timestamp)
00714 {
00715   CopyInheritedClosure* cl = (CopyInheritedClosure*) closure;
00716   if (strcmp(name, cl->name) == 0) {
00717     cl->vs->writeAttrib(op, name, value, NULL, timestamp);
00718   }
00719   return true;
00720 }
00721 
00722 // 
00723 // Remove old owner (or group) attrib in local realm, if any.  Also
00724 // gives the object its own owner (or group) attribute if it had been
00725 // inheriting; this is needed since the caller is about to do an
00726 // addAttrib.
00727 //
00728 static VestaSource::errorCode
00729 removeOldFromRealm(VestaSource* vs, bool owner /* vs. group */,
00730                    const char* newval, AccessControl::Identity cred)
00731 {
00732   if (!vs->hasAttribs()) return VestaSource::invalidArgs;
00733   const char* name = owner ? "#owner" : "#group";
00734   const char* name2 = owner ? "#setuid" : "#setgid";
00735   if (vs->getAttribConst(name) == NULL) {
00736     // Copy inherited attribs before modifying own attribs
00737     CopyInheritedClosure cic;
00738     cic.vs = vs;
00739     cic.name = name;
00740     if (owner) {
00741       vs->ac.owner.getAttribHistory(copyInheritedCallback, &cic);
00742     } else {
00743       vs->ac.group.getAttribHistory(copyInheritedCallback, &cic);
00744     }
00745   }
00746   // Strategy: we can't modify vs's own attributes while listing them,
00747   // so instead we make a list of the needed changes, then do them.
00748   RemoveOldFromRealmClosure cl;
00749   cl.vs = vs;
00750   vs->getAttrib(name, removeOldFromRealmCallback, &cl);
00751   int i;
00752   for (i=0; i<cl.oldvals.size(); i++) {
00753     const char* value = cl.oldvals.get(i);
00754     VestaSource::errorCode err;
00755     if (strcmp(value, newval) == 0) continue; // don't remove if about to add
00756     // First remove from the corresponding #setuid/#setgid attrib if present
00757     if (vs->inAttribs(name2, value)) {
00758       err = vs->removeAttrib(name2, value, cred);
00759       if (err != VestaSource::ok) return err;
00760     }
00761     // Then remove from the #owner/#group attribute
00762     err = vs->removeAttrib(name, value, cred);
00763     if (err != VestaSource::ok) return err;
00764   }
00765   return VestaSource::ok;
00766 }
00767 
00768 //
00769 // Common code for setting NFS attributes
00770 // newattr: new attributes requested (in)
00771 // vs: VestaSource object (in/out)
00772 // fd: If vs is a file, fd may be either -1 or a file descriptor
00773 //       open for writing; otherwise fd is unused (in)
00774 // attr: resulting attributes after changes (out)
00775 //
00776 nfsstat
00777 apply_sattr(sattr* newattr, VestaSource* vs, int fd,
00778             AccessControl::Identity cred, fattr* attr)
00779 {
00780     bool isfile = (vs->type == VestaSource::immutableFile ||
00781                    vs->type == VestaSource::mutableFile);
00782     bool closefd = false;
00783     bool commit = false;
00784     struct stat st;
00785     nfsstat status = NFS_OK;
00786     bool setctime = false;
00787     timeval now;
00788     int ok = gettimeofday(&now, NULL);
00789     assert(ok != -1);
00790     FdCache::OFlag ofl;
00791 
00792     if (RootLongId.isAncestorOf(vs->longid) ||
00793         MutableRootLongId.isAncestorOf(vs->longid)) {
00794       // Make changes as failure-atomic as we can.  Some changes are
00795       // made immediately to the sid file, however.  Also, changes
00796       // aren't error-atomic; i.e., there can be a permission error
00797       // partway through, after some changes have already been made, and
00798       // those changes will be committed anyway.
00799       VRLog.start();
00800       commit = true;
00801     }
00802 
00803     // Get current attributes.  Also opens file if needed.
00804     if (isfile) {
00805         if (fd == -1) {
00806             fd = FdCache::open(vs->shortId(), FdCache::any, &ofl);
00807             if (fd < 0) {
00808                 // Most likely file isn't really there, though
00809                 // normally this shouldn't happen.
00810                 status = xlate_errno(errno);
00811                 goto finish;
00812             }
00813             closefd = true;
00814         }
00815         if (fstat(fd, &st) < 0) {
00816             status = xlate_errno(errno);
00817             goto finish;
00818         }
00819         RECORD_TIME_POINT;
00820         file_fattr(attr, &st, vs);
00821         RECORD_TIME_POINT;
00822     } else {
00823         status = any_fattr(attr, vs, -1);
00824         if (status != NFS_OK) goto finish;
00825     }
00826 
00827     // size changes
00828     if (isfile && newattr->size != (u_int) -1 &&
00829         newattr->size != attr->size) {
00830       RECORD_TIME_POINT;
00831         // Access check
00832         if (!vs->master) {
00833             status = NFSERR_ROFS;
00834             goto finish;
00835         }
00836         if (!vs->ac.check(cred, AccessControl::write)) {
00837             status = NFSERR_ACCES;
00838             goto finish;
00839         }
00840         if (closefd && ofl == FdCache::ro) {
00841             // Oops, needed it open for writing
00842             FdCache::close(vs->shortId(), fd, ofl);
00843             fd = FdCache::open(vs->shortId(), FdCache::rw, &ofl);
00844             if (fd < 0) {
00845                 closefd = false;
00846                 status = xlate_errno(errno);
00847                 goto finish;
00848             }
00849         }
00850         if (ftruncate(fd, newattr->size) < 0) {
00851             status = xlate_errno(errno);
00852             goto finish;
00853         }
00854         // Modify attr to reflect the change
00855         attr->size = newattr->size;
00856         setctime = true;
00857     }
00858 
00859     // uid changes
00860     if (newattr->uid != (u_int) -1 && newattr->uid != attr->uid) {
00861       RECORD_TIME_POINT;
00862       // Ignore error if setting uid on something with no attribs
00863       if (bsdChown) {
00864         if (!vs->ac.check(cred, AccessControl::administrative)) {
00865             status = NFSERR_PERM;
00866             goto finish;
00867         }
00868       }
00869       const char* newOwner = AccessControl::unixToGlobalUser(newattr->uid);
00870       VestaSource::errorCode err =
00871         removeOldFromRealm(vs, true, newOwner, cred);
00872       if (err == VestaSource::ok) {
00873         err = vs->addAttrib("#owner", newOwner, cred);
00874       }
00875       // Ignore error if setting uid on something with no attribs
00876       if (err != VestaSource::ok && err != VestaSource::invalidArgs) {
00877         status = xlate_vserr(err);
00878         goto finish;
00879       }
00880       vs->ac.owner = *vs;
00881       // Modify attr to reflect the change
00882       attr->uid = newattr->uid;
00883       attr->mode &= ~04000; // turn off setuid
00884       setctime = true;
00885     }
00886 
00887     // gid changes
00888     if (newattr->gid != (u_int) -1 && newattr->gid != attr->gid) {
00889       RECORD_TIME_POINT;
00890       const char* newGroup = AccessControl::unixToGlobalGroup(newattr->gid);
00891       VestaSource::errorCode err =
00892         removeOldFromRealm(vs, false, newGroup, cred);
00893       if (err == VestaSource::ok) {
00894         err = vs->addAttrib("#group", newGroup, cred);
00895       }
00896       // Ignore error if setting gid on something with no attribs
00897       if (err != VestaSource::ok && err != VestaSource::invalidArgs) {
00898         status = xlate_vserr(err);
00899         goto finish;
00900       }
00901       vs->ac.group = *vs;
00902       // Modify attr to reflect the change
00903       attr->gid = newattr->gid;
00904       attr->mode &= ~02000; // turn off setgid
00905       setctime = true;
00906     }
00907 
00908     // mode changes
00909     unsigned int newmode;
00910     if (newattr->mode != (unsigned) -1 &&
00911         (newmode = newattr->mode & 07777) != (attr->mode & 07777)) {
00912         RECORD_TIME_POINT;
00913         // Sticky bit is not supported
00914         if (newmode & 01000) {
00915             status = NFSERR_PERM;
00916             goto finish;
00917         }
00918         // Change in setuid bit?
00919         if ((newmode ^ attr->mode) & 04000) {
00920             VestaSource::errorCode err;
00921             if (newmode & 04000) {
00922                 err = vs->addAttrib("#setuid", 
00923                         AccessControl::unixToGlobalUser(attr->uid), cred);
00924             } else {
00925                 err = vs->removeAttrib("#setuid", 
00926                         AccessControl::unixToGlobalUser(attr->uid), cred);
00927             }
00928             // Make setting mode on something with no attribs a no-op
00929             if (err != VestaSource::ok && err != VestaSource::invalidArgs) {
00930                 status = xlate_vserr(err);
00931                 goto finish;
00932             }
00933             setctime = true;
00934         }
00935         // Change in setgid bit?
00936         if ((newmode ^ attr->mode) & 02000) {
00937             VestaSource::errorCode err;
00938             if (newmode & 02000) {
00939                 err = vs->addAttrib("#setgid", 
00940                         AccessControl::unixToGlobalGroup(attr->gid), cred);
00941             } else {
00942                 err = vs->removeAttrib("#setgid", 
00943                         AccessControl::unixToGlobalGroup(attr->gid), cred);
00944             }
00945             // Make setting mode on something with no attribs a no-op
00946             if (err != VestaSource::ok && err != VestaSource::invalidArgs) {
00947                 status = xlate_vserr(err);
00948                 goto finish;
00949             }
00950             setctime = true;
00951         }
00952         // Access check
00953         if (!vs->ac.check(cred, AccessControl::ownership)) {
00954             status = NFSERR_PERM;
00955             goto finish;
00956         }
00957         // Change in executability?
00958         bool old_exc = false, new_exc = false;
00959         if (isfile) {
00960             if (!vs->master) {
00961                 status = NFSERR_ROFS;
00962                 goto finish;
00963             }
00964             old_exc = (st.st_mode & 0111) != 0;
00965             new_exc = (newmode & 0111) != 0;
00966             if (old_exc != new_exc) {
00967                 if (vs->type == VestaSource::immutableFile) {
00968                     status = NFSERR_PERM;
00969                     goto finish;
00970                 }
00971                 int res =
00972                   fchmod(fd, (st.st_mode & ~0111) | (new_exc ? 0111 : 0));
00973                 if (res < 0) {
00974                     status = xlate_errno(errno);
00975                     goto finish;
00976                 }
00977                 setctime = true;
00978             }
00979             // Modify newmode to reflect the effective result
00980             newmode = (newmode & ~0111) |
00981               (new_exc ? (newmode & 0444) >> 2 : 0);
00982         }
00983         // Change in other mode bits?
00984         if ((newmode ^ attr->mode) & (isfile ? 0666 : 0777)) {
00985             const char* val =
00986               AccessControl::formatModeBits(newmode & (isfile ? 0666 : 0777));
00987             VestaSource::errorCode err =
00988               vs->setAttrib("#mode", val, cred);
00989             delete [] val;
00990             // Making setting mode on something with no attribs a no-op
00991             if (err != VestaSource::ok && err != VestaSource::invalidArgs) {
00992                 status = xlate_vserr(err);
00993                 goto finish;
00994             }
00995             vs->ac.mode = newmode;
00996             setctime = true;
00997         }
00998         // Modify attr to reflect the change
00999         attr->mode = (attr->mode & ~07777) | (newmode & 07777);
01000     }
01001 
01002     // time changes
01003     if ((newattr->atime.seconds != (unsigned) -1 &&
01004          newattr->atime.seconds != attr->atime.seconds) ||
01005         (newattr->mtime.seconds != (unsigned) -1 &&
01006          newattr->mtime.seconds != attr->mtime.seconds)) {
01007         RECORD_TIME_POINT;
01008 
01009         // Access check
01010         if (!vs->ac.check(cred, AccessControl::ownership)) {
01011             status = NFSERR_PERM;
01012             goto finish;
01013         }
01014         struct timeval tvp[2];
01015         if (newattr->atime.seconds != (unsigned) -1) {
01016             tvp[0].tv_sec = newattr->atime.seconds;
01017             tvp[0].tv_usec = newattr->atime.useconds;
01018         } else { 
01019             tvp[0].tv_sec = attr->atime.seconds;
01020             tvp[0].tv_usec = attr->atime.useconds;
01021         }
01022         if (newattr->mtime.seconds != (unsigned) -1) {
01023             tvp[1].tv_sec = newattr->mtime.seconds;
01024             tvp[1].tv_usec = newattr->mtime.useconds;
01025         } else { 
01026             tvp[1].tv_sec = attr->mtime.seconds;
01027             tvp[1].tv_usec = attr->mtime.useconds;
01028         }
01029         if (isfile) {
01030             char *path = ShortIdBlock::shortIdToName(vs->shortId());
01031             int ok = utimes(path, tvp);
01032             delete path;
01033             if (ok < 0) {
01034                 status = xlate_errno(errno);
01035                 goto finish;
01036             }
01037             // This is a cheat; "now" may not be exactly the new
01038             // ctime, but it will be very close.  Avoids call to stat.
01039             attr->ctime.seconds = now.tv_sec;
01040             attr->ctime.useconds = now.tv_usec;
01041         } else {
01042             if (!vs->master ||
01043                 vs->type == VestaSource::immutableDirectory) {
01044                 status = NFSERR_ROFS;
01045                 goto finish;
01046             }
01047             vs->setTimestamp(tvp[1].tv_sec);
01048             // Modify attr to reflect the actual change
01049             attr->atime.seconds = attr->mtime.seconds =
01050               attr->ctime.seconds = vs->timestamp();
01051             attr->atime.useconds = attr->mtime.useconds =
01052               attr->ctime.useconds = 0;
01053         }
01054         setctime = false; 
01055     }
01056 
01057     // ctime changes as a side-effect.  Note that case where atime
01058     // or mtime change was requested is handled above, not here.
01059     if (setctime) {
01060         RECORD_TIME_POINT;
01061         if (isfile) {
01062             // This is a cheat; "now" may not be exactly the new
01063             // ctime, but it will be very close.  Avoids call to stat.
01064             attr->ctime.seconds = now.tv_sec;
01065             attr->ctime.useconds = now.tv_usec;
01066         } else {            
01067             vs->setTimestamp(now.tv_sec);
01068             // Modify attr to reflect the actual change
01069             attr->atime.seconds = attr->mtime.seconds =
01070               attr->ctime.seconds = vs->timestamp();
01071             attr->atime.useconds = attr->mtime.useconds =
01072               attr->ctime.useconds = 0;
01073         }
01074     }
01075     status = NFS_OK;
01076 
01077   finish:
01078     RECORD_TIME_POINT;
01079     if (commit) VRLog.commit();
01080     RECORD_TIME_POINT;
01081     if (closefd) FdCache::close(vs->shortId(), fd, ofl);
01082     RECORD_TIME_POINT;
01083     return status;
01084 }
01085 
01086 //
01087 // Set NFS attributes.
01088 //
01089 nfsstat
01090 do_setattr(sattrargs* argp, fattr* attr, AccessControl::Identity cred)
01091 {
01092   ReadersWritersLock* lock;
01093   nfsstat status = NFS_OK;
01094   VestaSource* vs = NULL;
01095     
01096   // Repeat until the file is mutable and we have the directory write
01097   // lock, or we detect an error.
01098   for (;;) {
01099     // First get the write lock
01100     RECORD_TIME_POINT;
01101     vs = ((LongId*) &argp->file)->lookup(LongId::writeLock, &lock);
01102     RECORD_TIME_POINT;
01103     if (vs == NULL) {
01104       if(*((LongId*) &argp->file) == NullLongId)
01105         {
01106           status = NFSERR_INVAL;
01107         }
01108       else
01109         {
01110           status = NFSERR_STALE;
01111           stalemsg("setattr", (LongId*) &argp->file);
01112         }
01113       goto finish;
01114     }
01115 
01116     RWLOCK_LOCKED_REASON(lock, "NFS:setattr:checking for mutability");
01117     
01118     // Copy-on-write the file if needed
01119     if (vs->type == VestaSource::immutableFile &&
01120         (MutableRootLongId.isAncestorOf(vs->longid) ||
01121          VolatileRootLongId.isAncestorOf(vs->longid))) {
01122 
01123       // Copying is needed; release the lock to avoid deadlock
01124       lock->releaseWrite();
01125       lock = NULL;
01126 
01127       RECORD_TIME_POINT;
01128       VestaSource* newvs =
01129         do_cow(vs, &status, (Basics::uint64) argp->attributes.size);
01130       RECORD_TIME_POINT;
01131       if (newvs == NULL) {
01132         goto finish;
01133       }
01134 
01135       delete vs;
01136       delete newvs;
01137 
01138       // Loop back to reacquire the lock
01139       continue;
01140 
01141     } else {
01142       // Copying is not needed; done
01143       break;
01144     }
01145   }
01146 
01147   // If this is an object under the mutable root that doesn't have
01148   // attributes, it must still be in the immutable base directory
01149   // (i.e. an immutable directory unmodified since checkout).  To
01150   // change its attributes, we need to copy it to the mutable portion.
01151   if(!vs->hasAttribs() && MutableRootLongId.isAncestorOf(vs->longid))
01152     {
01153       RECORD_TIME_POINT;
01154       VestaSource* newvs = 0;
01155       VestaSource::errorCode err =
01156         vs->copyToMutable(newvs, cred);
01157       if (err != VestaSource::ok) {
01158         assert(newvs == 0);
01159         status = xlate_vserr(err);
01160         goto finish;
01161       }
01162       delete vs;
01163       vs = newvs;
01164       RECORD_TIME_POINT;
01165     }
01166 
01167   RECORD_TIME_POINT;
01168   status = apply_sattr(&argp->attributes, vs, -1, cred, attr);
01169   RECORD_TIME_POINT;
01170     
01171  finish:
01172   if (lock != NULL) lock->releaseWrite();
01173   if (vs != NULL) delete vs;
01174   return status;
01175 }
01176 
01177 // 
01178 // Look up a name in a directory
01179 //
01180 nfsstat
01181 do_lookup(diropargs *dopa, diropokres* dp, AccessControl::Identity cred)
01182 {
01183     ReadersWritersLock* lock = 0;
01184     nfsstat status = NFS_OK;
01185     VestaSource::errorCode err;
01186 
01187     RECORD_TIME_POINT;
01188     VestaSource* vs = ((LongId*) &dopa->dir)->lookup(LongId::readLock, &lock);
01189     RECORD_TIME_POINT;
01190 
01191     RWLOCK_LOCKED_REASON(lock, "NFS:lookup");
01192 
01193     VestaSource* vs2 = NULL;
01194     if (vs == NULL) {
01195       if(*((LongId*) &dopa->dir) == NullLongId)
01196         {
01197           status = NFSERR_INVAL;
01198         }
01199       else
01200         {
01201           status = NFSERR_STALE;
01202           stalemsg("lookup", (LongId*) &dopa->dir);
01203         }
01204       goto finish;
01205     }
01206     switch (vs->type) {
01207       case VestaSource::immutableDirectory:
01208       case VestaSource::appendableDirectory:
01209       case VestaSource::mutableDirectory:
01210       case VestaSource::volatileDirectory:
01211       case VestaSource::volatileROEDirectory:
01212       case VestaSource::evaluatorDirectory:
01213       case VestaSource::evaluatorROEDirectory:
01214         if (strcmp(dopa->name, "") == 0 || strcmp(dopa->name, ".") == 0) {
01215             // return self
01216             dp->file = dopa->dir;
01217             vs2 = vs;
01218         } else if (strcmp(dopa->name, "..") == 0) {
01219             // return parent
01220             LongId plongid = vs->longid.getParent();
01221             *(Byte32*)& dp->file = plongid.value;
01222             RECORD_TIME_POINT;
01223             vs2 = plongid.lookup(); // ugh, but needed to get attributes
01224             RECORD_TIME_POINT;
01225         } else {
01226           RECORD_TIME_POINT;
01227           TIMING_RECORD_LONGID(vs->longid);
01228             err = vs->lookup(dopa->name, vs2, cred);
01229             if (err != VestaSource::ok) {
01230               RECORD_TIME_POINT;
01231                 status = xlate_vserr(err);
01232                 goto finish;
01233             } else {
01234               RECORD_TIME_POINT;
01235                 *(Byte32*)& dp->file = vs2->longid.value;
01236             }
01237         }
01238         break;
01239         
01240       default:
01241         status = NFSERR_NOTDIR;
01242         goto finish;
01243     }
01244 
01245     // If this is a VLeaf, it's safe to release the lock before
01246     // calling any_fattr.
01247     if((lock != NULL) &&
01248        ((vs2->type == VestaSource::immutableFile) ||
01249         (vs2->type == VestaSource::mutableFile)))
01250       {
01251         lock->releaseRead();
01252         lock = NULL;
01253       }
01254     
01255     // Get fattr
01256     RECORD_TIME_POINT;
01257     status = any_fattr(&(dp->attributes), vs2, -1);
01258     RECORD_TIME_POINT;
01259 
01260   finish:
01261     if (lock != NULL) lock->releaseRead();
01262     if (vs2 != vs && vs2 != NULL) delete vs2;
01263     if (vs != NULL) delete vs;
01264     return status;
01265 }
01266 
01267 //
01268 // Given a file handle, return an open file descriptor and a
01269 // VestaSource*.
01270 // Also does access checking and handles copy-on-write.  Seek pointer
01271 // location is unspecified.
01272 //
01273 int
01274 fh_fd(nfs_fh* fh, nfsstat* status, int omode, VestaSource** vsout, int* oflout,
01275       AccessControl::Identity cred)
01276 {
01277   int fd = -1;
01278   bool writing = (bool) (omode == O_WRONLY || omode == O_RDWR);
01279   ReadersWritersLock* lock = NULL;
01280   FdCache::OFlag ofl = FdCache::any;
01281   nfsstat st = NFS_OK;
01282 
01283   // Optimistically get only a read lock.  If copy-on-write is
01284   // required, do_cow will go through some gyrations to do
01285   // it without holding the read or write lock for too long.
01286   RECORD_TIME_POINT;
01287   VestaSource* vs = ((LongId*) fh)->lookup(LongId::readLock, &lock);
01288   RECORD_TIME_POINT;
01289   if (vs == NULL) {
01290     st = NFSERR_STALE;
01291     stalemsg(writing ? "write" : "read", (LongId*) fh);
01292     goto error;
01293   }
01294 
01295   RWLOCK_LOCKED_REASON(lock, (writing ? "NFS:write" : "NFS:read"));
01296 
01297   // Check access.  FileShortIdRootLongIds will pass here but
01298   //  may fail when FdCache::open is called.
01299   if (!vs->ac.check(cred, (writing ? AccessControl::write :
01300                            AccessControl::read)) &&
01301       !vs->ac.check(cred, AccessControl::ownership)) {
01302     st = NFSERR_ACCES;
01303     goto error;
01304   }
01305   RECORD_TIME_POINT;
01306 
01307   // OK to release the readLock or writeLock here, as the vs
01308   // is of type VLeaf; it doesn't point into the directory structure.
01309   if (lock) lock->release();
01310   lock = NULL;
01311 
01312   // If copy-on-write is needed...
01313   if (writing && vs->type == VestaSource::immutableFile &&
01314       (MutableRootLongId.isAncestorOf(vs->longid) ||
01315        VolatileRootLongId.isAncestorOf(vs->longid))) {
01316 
01317     RECORD_TIME_POINT;
01318     VestaSource *newvs = do_cow(vs, &st);
01319     RECORD_TIME_POINT;
01320     if (newvs == NULL) {
01321       assert(st != NFS_OK);
01322       goto error;
01323     }
01324     delete vs;
01325     vs = newvs;
01326 
01327     // Could redo access check here, as directory structure could
01328     // have changed, but it seems harmless to let it through; we
01329     // would have allowed access if the copy had not been needed.
01330     // We don't want to move the first access check to after the
01331     // copy-on-write, because we don't want users who never had
01332     // write access to be able to force a copy to happen.
01333   }
01334 
01335   switch (vs->type) {
01336   case VestaSource::immutableFile:
01337     if (writing) {
01338       st = NFSERR_ACCES;
01339       goto error;
01340     } else {
01341       ofl = FdCache::any;
01342       RECORD_TIME_POINT;
01343       fd = FdCache::open(vs->shortId(), ofl, &ofl);
01344       RECORD_TIME_POINT;
01345       //Repos::dprintf(DBG_NFS, "imm sid:%08x\n", vs->shortId());
01346       if (fd < 0) {
01347         st = xlate_errno(errno);
01348         goto error;
01349       }
01350     }
01351     break;
01352         
01353   case VestaSource::mutableFile:
01354     if (writing) {
01355       ofl = FdCache::rw;
01356     } else {
01357       ofl = FdCache::any;
01358     }
01359     RECORD_TIME_POINT;
01360     fd = FdCache::open(vs->shortId(), ofl, &ofl);
01361     RECORD_TIME_POINT;
01362     //Repos::dprintf(DBG_NFS, "mut sid:%08x\n", vs->shortId());
01363     if (fd < 0) {
01364       st = xlate_errno(errno);
01365       goto error;
01366     }
01367     break;
01368         
01369   case VestaSource::device:
01370     fd = DEVICE_FAKE_FD;
01371     ofl = FdCache::rw;
01372     //Repos::dprintf(DBG_NFS, "device 0x%x\n", vs->shortId());
01373     break;
01374         
01375   default:
01376     st = NFSERR_ISDIR;
01377     goto error;
01378   }
01379     
01380   *status = st;
01381   *vsout = vs;
01382   *oflout = (int) ofl;
01383   return fd;
01384 
01385  error:
01386   if (lock) lock->release();
01387   if (vs) delete vs;
01388   *status = st;
01389   *vsout = NULL;
01390   *oflout = -1;
01391   return -1;
01392 }
01393 
01394 //
01395 // Return fd to cache
01396 //
01397 void
01398 fd_inactive(void* vsin, int fd, int ofl)
01399 {
01400     VestaSource* vs = (VestaSource*) vsin;
01401     if (fd != DEVICE_FAKE_FD) 
01402       FdCache::close(vs->shortId(), fd, (FdCache::OFlag) ofl);
01403     delete vs;
01404 }
01405 
01406 struct readdirClosure {
01407     VestaSource* vs;
01408     entry** e;
01409     int res_size;
01410     int count;
01411     bool first;
01412     bool full;
01413     unsigned int cookie_incr;
01414 };
01415 
01416 #define DP_SLOP 16
01417 
01418 static bool
01419 readdirCallback(void* closure, VestaSource::typeTag type, Arc arc,
01420                 unsigned int index, unsigned int pseudoInode,
01421                 ShortId filesid, bool master)
01422 {
01423     readdirClosure* cl = (readdirClosure*) closure;
01424     entry* e;
01425     int esize = sizeof(entry) + strlen(arc) + DP_SLOP;
01426     if (cl->res_size + esize < cl->count) {
01427         e = (entry*) malloc(sizeof(entry));
01428         assert(e != NULL);
01429         *(cl->e) = e;
01430         e->fileid = pseudoInode;
01431         e->name = strdup(arc);
01432         // Linux client treats this field as a big-endian integer.
01433         // Some kernel versions sign-extend it and some do not, which
01434         // causes problems with some glibc versions.  So we avoid setting
01435         // what looks like the sign bit under the big-endian interpretation.
01436         index += cl->cookie_incr;
01437         e->cookie[0] = index >> 24;
01438         e->cookie[1] = index >> 16;
01439         e->cookie[2] = index >> 8;
01440         e->cookie[3] = index >> 0;
01441         cl->e = &(e->nextentry);
01442         cl->res_size += esize;
01443         return true;
01444     } else {
01445         cl->full = true;
01446         return false;
01447     }
01448 }
01449 
01450 nfsstat
01451 do_readdir(readdirargs* argp, result_types* resp, AccessControl::Identity cred)
01452 {
01453     ReadersWritersLock* lock;
01454     nfsstat status = NFS_OK;
01455     unsigned int cookie;
01456 
01457     RECORD_TIME_POINT;
01458     VestaSource* vs = ((LongId*) &argp->dir)->lookup(LongId::readLock, &lock);
01459     RECORD_TIME_POINT;
01460 
01461     RWLOCK_LOCKED_REASON(lock, "NFS:readdir");
01462 
01463     int fd;
01464     if (vs == NULL) {
01465       if(*((LongId*) &argp->dir) == NullLongId)
01466         {
01467           status = NFSERR_INVAL;
01468         }
01469       else
01470         {
01471           status = NFSERR_STALE;
01472           stalemsg("readdir", (LongId*) &argp->dir);
01473         }
01474       goto finish;
01475     }
01476     switch (vs->type) {
01477       case VestaSource::immutableDirectory:
01478       case VestaSource::appendableDirectory:
01479       case VestaSource::mutableDirectory:
01480       case VestaSource::volatileDirectory:
01481       case VestaSource::volatileROEDirectory:
01482       case VestaSource::evaluatorDirectory:
01483       case VestaSource::evaluatorROEDirectory:
01484         break;
01485       default:
01486         status = NFSERR_NOTDIR;
01487         goto finish;
01488     }
01489     
01490     readdirClosure cl;
01491     cl.vs = vs;
01492     cl.res_size = 0;
01493     cl.e = &(resp->r_readdirres.readdirres_u.reply.entries);
01494     cl.count = argp->count;
01495     cl.first = true;
01496     cl.full = false;
01497     cl.cookie_incr = (vs->longid == VolatileRootLongId) ? 1 : 2;
01498     // Undo big-endian packing; see comment in callback above
01499     cookie = ((((unsigned char) argp->cookie[0]) << 24) +
01500               (((unsigned char) argp->cookie[1]) << 16) +
01501               (((unsigned char) argp->cookie[2]) << 8) +
01502               (((unsigned char) argp->cookie[3]) << 0));
01503     //
01504     // Kludge warning.  VestaSource assigns cookies in this order: 
01505     //   2, 4, 6, ..., 1, 3, 5, ...
01506     // Cookie 0 is unused on output and equivalent to 2 on input.
01507     // The problem is that we need *two* unused cookies to represent
01508     // starting at the "." or ".." entry of the directory, neither of
01509     // which exist at the VestaSource interface.  We kludge this by
01510     // assigning:
01511     //   0           = start at "."  (required by NFS interface)
01512     //   0x7fffffff  = start at ".." (!)
01513     //   2           = start just after ".."
01514     // I used to not have a cookie for starting at ".."; I just
01515     // filled in 2 as the next cookie for both of the first two
01516     // entries.  For some reason this causes the Linux NFS client
01517     // to hang when listing an empty directory!
01518     // Later I used 0xffffffff as the cookie to start at "..".
01519     // This caused problems with a later Linux + glibc combo
01520     // where glibc expected the kernel to sign-extend the cookie
01521     // but the kernel didn't do so.
01522     //
01523     // KCS 2004-03-10: The volatile root is a bit of a special case,
01524     // as it assigns indicies in this order:
01525     //   1, 2, 3, 4, 5, ...
01526     // We hadle this by using cl.cookie_incr.
01527     //
01528     if (cookie == 0) {
01529         // Put in "."
01530         // Set the index in this fake callback so the next cookie 
01531         //  is computed as 0x7fffffff.
01532         readdirCallback((void*) &cl, vs->type, ".", 0x7fffffff-cl.cookie_incr,
01533                         vs->pseudoInode, NullShortId, true);
01534     }
01535     if (!cl.full && (cookie == 0 || cookie == 0x7fffffff)) {
01536         // Put in ".."
01537         // Set the index so the next cookie is the first real index.
01538         VestaSource* vs2 = vs->longid.getParent().lookup();
01539         if (vs2 == NULL) {
01540             readdirCallback((void*) &cl, vs->type, "..", 0,
01541                             vs->pseudoInode, NullShortId, true);
01542         } else {
01543             readdirCallback((void*) &cl, vs2->type, "..", 0,
01544                             vs2->pseudoInode, NullShortId, true);
01545             delete vs2;
01546         }
01547         // Arrange to continue at the first real entry.
01548         cookie = 0;
01549     }
01550     if (!cl.full) {
01551       RECORD_TIME_POINT;
01552       TIMING_RECORD_LONGID(vs->longid);
01553         status = xlate_vserr(vs->list(cookie, readdirCallback, &cl, cred));
01554       RECORD_TIME_POINT;
01555     }
01556     *(cl.e) = NULL;
01557     resp->r_readdirres.readdirres_u.reply.eof = !cl.full;
01558   finish:
01559     resp->r_readdirres.status = status;
01560     if (lock != NULL) lock->releaseRead();
01561     if (vs != NULL) delete vs;
01562     return status;
01563 }
01564 
01565 
01566 // Create a mutable file
01567 nfsstat
01568 do_create(createargs* argp, diropokres* dp, AccessControl::Identity cred)
01569 {
01570     ReadersWritersLock* lock;
01571     nfsstat status = NFS_OK;
01572     int fd = -1;
01573     VestaSource* vs2 = NULL;
01574     VestaSource* newvs = NULL;
01575 
01576     // Find the directory
01577     RECORD_TIME_POINT;
01578     VestaSource* vs =
01579       ((LongId*) &argp->where.dir)->lookup(LongId::writeLock, &lock);
01580     RECORD_TIME_POINT;
01581 
01582     RWLOCK_LOCKED_REASON(lock, "NFS:create");
01583 
01584     if (vs == NULL) {
01585       if(*((LongId*) &argp->where.dir) == NullLongId)
01586         {
01587           status = NFSERR_INVAL;
01588         }
01589       else
01590         {
01591           status = NFSERR_STALE;
01592           stalemsg("create", (LongId*) &argp->where.dir);
01593         }
01594       goto finish;
01595     }
01596     
01597     switch (vs->type) {
01598       case VestaSource::immutableDirectory:
01599       case VestaSource::evaluatorDirectory:
01600       case VestaSource::evaluatorROEDirectory:
01601         {
01602             // Try to do copy-on-write
01603             VestaSource* newvs;
01604             RECORD_TIME_POINT;
01605             VestaSource::errorCode err = vs->makeMutable(newvs);
01606             RECORD_TIME_POINT;
01607             if (err != VestaSource::ok) {
01608                 status = xlate_vserr(err);
01609                 goto finish;
01610             }
01611             delete vs;
01612             vs = newvs;
01613         }
01614         break;
01615         
01616       case VestaSource::appendableDirectory:
01617         status = NFSERR_ACCES;
01618         goto finish;
01619         
01620       default:
01621         status = NFSERR_NOTDIR;
01622         goto finish;
01623         
01624       case VestaSource::mutableDirectory:
01625       case VestaSource::volatileDirectory:
01626       case VestaSource::volatileROEDirectory:
01627         break;
01628     }
01629     
01630     RECORD_TIME_POINT;
01631     // Check that name isn't already in use
01632     VestaSource::errorCode err;
01633     err = vs->lookup(argp->where.name, vs2, cred);
01634     if (err == VestaSource::ok ||
01635         strcmp(argp->where.name, "") == 0 ||
01636         strcmp(argp->where.name, ".") == 0 ||
01637         strcmp(argp->where.name, "..") == 0) {
01638         if (vs2 != NULL) delete vs2;
01639         status = NFSERR_EXIST;
01640         goto finish;
01641     } else if (err != VestaSource::notFound) {
01642         status = xlate_vserr(err);
01643         goto finish;
01644     }
01645     
01646     // (Next comment and code adapted from old nfsd.)
01647     // Compensate for a really bizarre bug in SunOS derived clients.
01648     if ((argp->attributes.mode & S_IFMT) == 0) {
01649         argp->attributes.mode |= S_IFREG;
01650     }
01651     
01652     // Can't create devices, etc.
01653     if (!S_ISREG(argp->attributes.mode)) {
01654         status = NFSERR_ACCES;
01655         goto finish;
01656     }
01657     
01658     // Make a new file named by ShortId
01659     ShortId sid;
01660     RECORD_TIME_POINT;
01661     fd = SourceOrDerived::fdcreate(sid);
01662     if (fd < 0) {
01663         status = xlate_errno(errno);
01664         goto finish;
01665     }
01666     
01667     // Insert file into the directory
01668     RECORD_TIME_POINT;
01669     err = vs->insertMutableFile(argp->where.name, sid, true, cred,
01670                                 VestaSource::dontReplace, &newvs);
01671     if (err != VestaSource::ok) {
01672         status = xlate_vserr(err);
01673         goto finish;
01674     }
01675 
01676     // Apply requested attributes and build return value
01677     *(LongId*) &dp->file = newvs->longid;
01678     RECORD_TIME_POINT;
01679     status = apply_sattr(&argp->attributes, newvs, fd, NULL,
01680                          &dp->attributes);
01681     RECORD_TIME_POINT;
01682 
01683   finish:
01684     if (lock != NULL) lock->releaseWrite();
01685     if (newvs != NULL) delete newvs;
01686     if (fd != -1) FdCache::close(sid, fd, FdCache::rw);
01687     if (vs != NULL) delete vs;
01688     return status;
01689 }
01690 
01691 
01692 // Stuff to help us remember the names in a deleted appendable directory
01693 struct NamelistClosure {
01694   ostream* val;
01695 };
01696 
01697 bool
01698 namelistCallback(void* closure, VestaSource::typeTag type, Arc arc,
01699                  unsigned int index, Bit32 pseudoInode, ShortId filesid,
01700                  bool master)
01701 {
01702   NamelistClosure *cl = (NamelistClosure *)closure;
01703   *cl->val << arc << "/";
01704   return true;
01705 }
01706 
01707 bool
01708 dirEmptyCallback(void* closure, VestaSource::typeTag type, Arc arc,
01709                  unsigned int index, Bit32 pseudoInode, ShortId filesid,
01710                  bool master)
01711 {
01712   bool *empty = (bool *) closure;
01713   *empty = false;
01714   return false;
01715 }
01716 
01717 // Delete a file or directory, replacing with a ghost if parent is
01718 // appendable.
01719 nfsstat
01720 do_remove(diropargs* argp, AccessControl::Identity cred)
01721 {
01722     ReadersWritersLock* lock;
01723     nfsstat status = NFS_OK;
01724     OBufStream val;
01725 
01726     // Find the parent directory
01727     VestaSource* vs = ((LongId*) &argp->dir)->lookup(LongId::writeLock, &lock);
01728     if (vs == NULL) {
01729       if(*((LongId*) &argp->dir) == NullLongId)
01730         {
01731           status = NFSERR_INVAL;
01732         }
01733       else
01734         {
01735           status = NFSERR_STALE;
01736           stalemsg("remove", (LongId*) &argp->dir);
01737         }
01738       goto finish;
01739     }
01740 
01741     RWLOCK_LOCKED_REASON(lock, "NFS:remove/rmdir");
01742     
01743     VestaSource::errorCode err;
01744     switch (vs->type) {
01745       case VestaSource::immutableDirectory:
01746       case VestaSource::evaluatorDirectory:
01747       case VestaSource::evaluatorROEDirectory:
01748         {
01749             // Try to do copy-on-write
01750             VestaSource* newvs;
01751             VestaSource::errorCode err = vs->makeMutable(newvs);
01752             if (err != VestaSource::ok) {
01753                 status = xlate_vserr(err);
01754                 goto finish;
01755             }
01756             delete vs;
01757             vs = newvs;
01758         }
01759         // fall through
01760 
01761       case VestaSource::mutableDirectory:
01762       case VestaSource::volatileDirectory:
01763       case VestaSource::volatileROEDirectory:
01764         {
01765           // Look up the object to be deleted
01766           VestaSource* child_vs;
01767           err = vs->lookup(argp->name, child_vs);
01768           if(err != VestaSource::ok) {
01769             status = xlate_vserr(err);
01770             goto finish;
01771           }
01772           // If the child is a non-empty directory, set this to false.
01773           bool empty = true;
01774           switch(child_vs->type)
01775             {
01776             case VestaSource::immutableDirectory:
01777               // We can optimize a little for immutableDirectory,
01778               // because the first child will always be index 2.
01779               // (That's actually also true for evaluator directories,
01780               // but it's not safe to call lookupIndex on them, as it
01781               // assumes lookupIndex can be serviced from cached
01782               // information, and there might not be any in this
01783               // case.)
01784               {
01785                 VestaSource* child_child_vs;
01786                 err = child_vs->lookupIndex(2, child_child_vs);
01787                 if(err == VestaSource::ok)
01788                   {
01789                     // This directory's no empty.
01790                     delete child_child_vs;
01791                     empty = false;
01792                   }
01793                 if(err != VestaSource::notFound)
01794                   {
01795                     delete child_vs;
01796                     status = xlate_vserr(err);
01797                     goto finish;
01798                   }
01799               }
01800               break;
01801             case VestaSource::mutableDirectory:
01802             case VestaSource::evaluatorDirectory:
01803             case VestaSource::evaluatorROEDirectory:
01804             case VestaSource::volatileDirectory:
01805             case VestaSource::volatileROEDirectory:
01806               // Check if this directory is empty by listing it.
01807               err = child_vs->list(0, dirEmptyCallback, (void *) &empty);
01808               if(err != VestaSource::ok)
01809                 {
01810                   delete child_vs;
01811                   status = xlate_vserr(err);
01812                   goto finish;
01813                 }
01814               break;
01815             }
01816 
01817           // We're done with the child now.
01818           delete child_vs;
01819 
01820           if(!empty)
01821             {
01822               // Can't delete a non-empty directory via NFS.
01823               status = NFSERR_NOTEMPTY;
01824               goto finish;
01825             }
01826           else
01827             // Do the deletion
01828             err = vs->reallyDelete(argp->name, cred, true);
01829         }
01830         break;
01831         
01832       case VestaSource::appendableDirectory:
01833         // Check if delete-like operations are restricted
01834         if (!vs->ac.check(cred, AccessControl::del)) {
01835             status = NFSERR_ACCES;
01836             goto finish;
01837         }
01838         // Check that this isn't already a ghost
01839         VestaSource* newvs;
01840         err = vs->lookup(argp->name, newvs);
01841         if (err != VestaSource::ok) {
01842             status = xlate_vserr(err);
01843             goto finish;
01844         }
01845         if (newvs->type == VestaSource::ghost) {
01846             delete newvs;
01847             status = NFSERR_ACCES;
01848             goto finish;
01849         }
01850         // If a master appendable directory, remember the name list; else
01851         // remember the fingerprint if any.
01852         val << newvs->typeTagChar(newvs->type) << ":";
01853         if (newvs->type == VestaSource::appendableDirectory) {
01854           if (newvs->master) {
01855             NamelistClosure cl;
01856             cl.val = &val;
01857             err = newvs->list(0, namelistCallback, &cl);
01858             if (err != VestaSource::ok) {
01859               status = xlate_vserr(err);
01860               delete newvs;
01861               goto finish;
01862             }
01863           }
01864         } else if (newvs->type != VestaSource::stub) {
01865           val << setw(2) << setfill('0') << hex;
01866           unsigned char fpbytes[FP::ByteCnt];
01867           newvs->fptag.ToBytes(fpbytes);
01868           int i;
01869           for (i=0; i<FP::ByteCnt; i++) {
01870             val << (int) (fpbytes[i] & 0xff);
01871           }
01872         }
01873         // Replace with a ghost
01874         VestaSource* ghostvs;
01875         err = vs->insertGhost(argp->name, newvs->master, cred,
01876                               VestaSource::replaceDiff, &ghostvs);
01877         if (err == VestaSource::ok) {
01878           const char *valstr = val.str();
01879           err = ghostvs->setAttrib("#formerly", valstr);
01880           delete ghostvs;
01881         }
01882         delete newvs;
01883         break;
01884         
01885       default:
01886         status = NFSERR_NOTDIR;
01887         goto finish;
01888     }
01889     status = xlate_vserr(err);
01890 
01891   finish:
01892     if (lock != NULL) lock->releaseWrite();
01893     if (vs != NULL) delete vs;
01894     return status;
01895 }
01896 
01897 
01898 // Rename something in a mutable or appendable directory
01899 nfsstat
01900 do_rename(renameargs* argp, AccessControl::Identity cred)
01901 {
01902     ReadersWritersLock* vlock = NULL;
01903     ReadersWritersLock* lock;
01904     nfsstat status;
01905     VestaSource* fromVS = NULL;
01906     VestaSource* toVS = NULL;
01907     
01908     // Find the old and new parent directories
01909     if (VolatileRootLongId.isAncestorOf(*(LongId*) &argp->from.dir)) {
01910       // Must retain VolatileRootLock.read across both longid lookups,
01911       // not acquire and release it within the first.
01912       vlock = &VolatileRootLock;
01913       vlock->acquireRead();
01914       RWLOCK_LOCKED_REASON(vlock, "NFS:rename in volatile");
01915       fromVS = ((LongId*) &argp->from.dir)->lookup(LongId::writeLockV, &lock);
01916     } else {
01917       fromVS = ((LongId*) &argp->from.dir)->lookup(LongId::writeLock, &lock);
01918     }
01919     if (fromVS == NULL) {
01920       if(*((LongId*) &argp->from.dir) == NullLongId)
01921         {
01922           status = NFSERR_INVAL;
01923         }
01924       else
01925         {
01926           status = NFSERR_STALE;
01927           stalemsg("rename fromdir", (LongId*) &argp->from.dir);
01928         }
01929       goto finish;
01930     }
01931 
01932     RWLOCK_LOCKED_REASON(lock, "NFS:rename");
01933 
01934     switch (fromVS->type) {
01935       case VestaSource::immutableDirectory:
01936       case VestaSource::evaluatorDirectory:
01937       case VestaSource::evaluatorROEDirectory:
01938         {
01939             // Try to do copy-on-write.
01940             // It's not great that we hold vlock through this call, but
01941             // it doesn't cause any actual harm.  makeMutable on a directory
01942             // should always be very fast.
01943             VestaSource* newFromVS;
01944             VestaSource::errorCode err = fromVS->makeMutable(newFromVS);
01945             if (err != VestaSource::ok) {
01946                 status = xlate_vserr(err);
01947                 goto finish;
01948             }
01949             delete fromVS;
01950             fromVS = newFromVS;
01951         }
01952         break;
01953         
01954       default:
01955         status = NFSERR_NOTDIR;
01956         goto finish;
01957         
01958       case VestaSource::mutableDirectory:
01959       case VestaSource::volatileDirectory:
01960       case VestaSource::volatileROEDirectory:
01961         break;
01962 
01963       case VestaSource::appendableDirectory:
01964 #if 0
01965         // Check if delete-like operations are restricted
01966         if (!fromVS->ac.check(cred, AccessControl::del)) {
01967             status = NFSERR_ACCES;
01968             goto finish;
01969         }
01970         // Would also need to check master statuses and do the right
01971         // thing here...
01972         break;
01973 #else
01974         // Not supported
01975         status = NFSERR_INVAL;
01976         goto finish;
01977 #endif
01978     }
01979     
01980     // Wait until here to look up toVS, because the copy-on-write
01981     // of fromVS could have also copied toVS.
01982     toVS = ((LongId*) &argp->to.dir)->lookup(LongId::checkLock, &lock);
01983     if (vlock != NULL) vlock->releaseRead();
01984     vlock = NULL;
01985     if (toVS == NULL) {
01986       if(*((LongId*) &argp->to.dir) == NullLongId)
01987         {
01988           status = NFSERR_INVAL;
01989         }
01990       else
01991         {
01992           status = NFSERR_STALE; // !!or could be cross-device link
01993           stalemsg("rename todir", (LongId*) &argp->to.dir);
01994         }
01995       goto finish;
01996     }
01997     if (toVS->type == VestaSource::immutableDirectory ||
01998         toVS->type == VestaSource::evaluatorDirectory ||
01999         toVS->type == VestaSource::evaluatorROEDirectory) {
02000         // Try to do copy-on-write
02001         VestaSource* newToVS;
02002         VestaSource::errorCode err = toVS->makeMutable(newToVS);
02003         if (err != VestaSource::ok) {
02004             status = xlate_vserr(err);
02005             goto finish;
02006         }
02007         delete toVS;
02008         toVS = newToVS;
02009     }   
02010     
02011     status =
02012       xlate_vserr(toVS->renameTo(argp->to.name, fromVS, argp->from.name,
02013                                  cred, VestaSource::replaceDiff));
02014 
02015   finish:    
02016     if (vlock != NULL) vlock->releaseRead();
02017     if (lock != NULL) lock->releaseWrite();
02018     if (fromVS != NULL) delete fromVS;
02019     if (toVS != NULL) delete toVS;
02020     return status;
02021 }
02022 
02023 // Hard link a file.  The link count is maintained by the directory
02024 // internally.  (See the ShortIdRefCount calss and its use in the
02025 // VDirChangeable class.)  Copy-on-write is performed if possible
02026 // (with both old and new links pointing to the new copy) in case
02027 // there are to be writes in the future.  This operation is allowed
02028 // only in descendents of the volatile root and mutable root, because
02029 // those directory types have the mutable shortid reference counting
02030 // machinery.
02031 nfsstat
02032 do_hardlink(linkargs* argp, AccessControl::Identity cred)
02033 {
02034     ReadersWritersLock* lock = NULL;
02035     nfsstat status;
02036     VestaSource* fromVS = NULL;
02037     VestaSource* toVS = NULL;
02038     LongId fromAncestor;
02039     
02040  retry:
02041     // Find the destination directory
02042     toVS = ((LongId*) &argp->to.dir)->lookup(LongId::writeLock, &lock);
02043     if (toVS == NULL) {
02044       if(*((LongId*) &argp->to.dir) == NullLongId)
02045         {
02046           status = NFSERR_INVAL;
02047         }
02048       else
02049         {
02050           status = NFSERR_STALE; // !! or cross-device link
02051           stalemsg("hardlink todir", (LongId*) &argp->to.dir);
02052         }
02053       goto finish;
02054     }
02055 
02056     RWLOCK_LOCKED_REASON(lock, "NFS:link");
02057 
02058     if(VolatileRootLongId.isAncestorOf(toVS->longid))
02059       {
02060         // The "from" file must be in the same volatile directory.
02061         // This disallows cross-linking between volatile directories.
02062         // It's neccessary to do this, because each volatile directory
02063         // keeps its own independent shortid reference count.
02064         fromAncestor = toVS->longid;
02065         LongId parent;
02066         while(!((parent = fromAncestor.getParent()) == VolatileRootLongId))
02067           {
02068             fromAncestor = parent;
02069           }
02070       }
02071     else if(MutableRootLongId.isAncestorOf(toVS->longid))
02072       {
02073         // The "from" file must also be in the mutable root.
02074         fromAncestor = MutableRootLongId;
02075       }
02076     else
02077       {
02078         // Can't create a hard-link outside the volatile and mutable
02079         // roots.
02080         status = NFSERR_INVAL;
02081         goto finish;
02082       }
02083 
02084     if (toVS->type == VestaSource::immutableDirectory ||
02085         toVS->type == VestaSource::evaluatorDirectory ||
02086         toVS->type == VestaSource::evaluatorROEDirectory) {
02087         // Try to do copy-on-write on destination directory
02088         VestaSource* newToVS;
02089         VestaSource::errorCode err = toVS->makeMutable(newToVS);
02090         if (err != VestaSource::ok) {
02091             status = xlate_vserr(err);
02092             goto finish;
02093         }
02094         delete toVS;
02095         toVS = newToVS;
02096     }   
02097     assert(toVS->type == VestaSource::mutableDirectory ||
02098            toVS->type == VestaSource::volatileDirectory ||
02099            toVS->type == VestaSource::volatileROEDirectory);
02100 
02101     // Find the existing file
02102     fromVS = ((LongId*) &argp->from)->lookup(LongId::checkLock, &lock);
02103     if (fromVS == NULL) {
02104       if(*((LongId*) &argp->from) == NullLongId)
02105         {
02106           status = NFSERR_INVAL;
02107         }
02108       else
02109         {
02110           status = NFSERR_STALE;
02111           stalemsg("hardlink from", (LongId*) &argp->from);
02112         }
02113       goto finish;
02114     }
02115     else if(fromAncestor.isAncestorOf(fromVS->longid)) {
02116         switch (fromVS->type) {
02117           case VestaSource::immutableFile:
02118             {
02119                 // Oops, need to do copy-on-write
02120                 lock->release();
02121                 lock = NULL;
02122                 VestaSource* newFromVS = do_cow(fromVS, &status);
02123                 if (newFromVS == NULL) goto finish;
02124                 delete toVS;      toVS = 0;
02125                 delete fromVS;    fromVS = 0;
02126                 delete newFromVS; newFromVS = 0;
02127                 goto retry;
02128             }
02129           case VestaSource::mutableFile:
02130             break;
02131           default:
02132             status = NFSERR_ISDIR;
02133             goto finish;
02134         }
02135         status =
02136           xlate_vserr(toVS->insertMutableFile(argp->to.name, fromVS->shortId(),
02137                                               true, cred,
02138                                               VestaSource::dontReplace, NULL));
02139     } else if (FileShortIdRootLongId.isAncestorOf(fromVS->longid) &&
02140                (toVS->type == VestaSource::volatileROEDirectory)) {
02141       // This case allows a hard-link to be created to an immutable
02142       // file in a read-only-existing volatile directory.
02143         status =
02144           xlate_vserr(toVS->insertFile(argp->to.name, fromVS->shortId(),
02145                                        true, cred,
02146                                        VestaSource::dontReplace, NULL, 0,
02147                                        &fromVS->fptag));
02148     } else {
02149         status = NFSERR_INVAL;
02150     }
02151   finish:    
02152     if (lock != NULL) lock->releaseWrite();
02153     if (fromVS != NULL) delete fromVS;
02154     if (toVS != NULL) delete toVS;
02155     return status;
02156 }
02157 
02158 // Symlinks are permitted only in appendable directories.  The
02159 // evaluator cannot see them, so they are useful only for 
02160 // browsing purposes.  Currently the "latest" link is the only
02161 // use of this feature.
02162 //
02163 // (Why forbid them elsewhere?  It seems useless to allow them
02164 // in immutable directories, because the evaluator can't see
02165 // them.  With that forbidden, it seems useless to allow them
02166 // in mutable directories, because they can't be checked in.
02167 // It seems needless to allow them in volatile directories,
02168 // because _run_tool doesn't know how to represent one in a
02169 // result binding, and no tools we've needed to encapsulate
02170 // so far need to create them.) 
02171 
02172 nfsstat
02173 do_symlink(symlinkargs* argp, AccessControl::Identity cred)
02174 {
02175     ReadersWritersLock* lock = NULL;
02176     nfsstat status = NFS_OK;
02177     VestaSource* vs = NULL, *newvs = NULL;
02178 
02179     // Site-configurable option to forbid making symlinks through
02180     // the NFS interface with the symlink() system call.  They can
02181     // still be made by Vesta tools, by putting the symlink-to
02182     // attribute on a stub.
02183     if (!allowSymlink) {
02184         status = NFSERR_ACCES;
02185         goto finish;
02186     }
02187 
02188     // Find the parent directory
02189     RECORD_TIME_POINT;
02190     vs = ((LongId*) &argp->from.dir)->lookup(LongId::writeLock, &lock);
02191     RECORD_TIME_POINT;
02192 
02193     RWLOCK_LOCKED_REASON(lock, "NFS:symlink");
02194 
02195     if (vs == NULL) {
02196       if(*((LongId*) &argp->from.dir) == NullLongId)
02197         {
02198           status = NFSERR_INVAL;
02199         }
02200       else
02201         {
02202           status = NFSERR_STALE;
02203           stalemsg("symlink", (LongId*) &argp->from.dir);
02204         }
02205       goto finish;
02206     }
02207     
02208     if (vs->type != VestaSource::appendableDirectory) {
02209         status = NFSERR_INVAL;
02210         goto finish;
02211     }
02212     
02213     // If name is already in use, make sure it's a ghost or stub
02214     //  with the symlink-to attribute.  (Unfortunately, "ln -s"
02215     //  thinks that you can never symlink from a name that's in use,
02216     //  and refuses to even try.)
02217     VestaSource::errorCode err;
02218     err = vs->lookup(argp->from.name, newvs, cred);
02219     if (err == VestaSource::ok) {
02220         // Stub or ghost is present; make sure it's already a symlink.
02221         if ((newvs->type != VestaSource::stub && 
02222              newvs->type != VestaSource::ghost) ||
02223             newvs->getAttribConst("symlink-to") == NULL) {
02224             status = NFSERR_EXIST;
02225             goto finish;
02226         }
02227     } else if (strcmp(argp->from.name, "") == 0 ||
02228                strcmp(argp->from.name, ".") == 0 ||
02229                strcmp(argp->from.name, "..") == 0) {
02230         status = NFSERR_EXIST;
02231         goto finish;
02232     } else if (err == VestaSource::notFound) {
02233         if (!vs->master) {
02234             status = NFSERR_ROFS;
02235             goto finish;
02236         }
02237         err = vs->insertStub(argp->from.name, true, 
02238                              cred, VestaSource::dontReplace, &newvs);
02239         if (err != VestaSource::ok) {
02240             status = xlate_vserr(err);
02241             goto finish;
02242         }
02243     } else {
02244         status = xlate_vserr(err);
02245         goto finish;
02246     }
02247 
02248     err = newvs->setAttrib("symlink-to", argp->to, cred);
02249 
02250     // Ignore requested attributes (ok per RFC 1094).
02251 
02252   finish:
02253     if (lock != NULL) lock->releaseWrite();
02254     if (newvs != NULL) delete newvs;
02255     if (vs != NULL) delete vs;
02256     return status;
02257 }
02258 
02259 // Create a mutable or appendable directory
02260 nfsstat
02261 do_mkdir(createargs* argp, diropokres* dp, AccessControl::Identity cred)
02262 {
02263     ReadersWritersLock* lock;
02264     nfsstat status = NFS_OK;
02265     bool needMasterHint = false;
02266 
02267     // Find the parent directory
02268     VestaSource* vs =
02269       ((LongId*) &argp->where.dir)->lookup(LongId::writeLock, &lock);
02270     VestaSource* newvs = NULL;
02271     if (vs == NULL) {
02272       if(*((LongId*) &argp->where.dir) == NullLongId)
02273         {
02274           status = NFSERR_INVAL;
02275         }
02276       else
02277         {
02278           status = NFSERR_STALE;
02279           stalemsg("mkdir", (LongId*) &argp->where.dir);
02280         }
02281       goto finish;
02282     }
02283     RWLOCK_LOCKED_REASON(lock, "NFS:mkdir");
02284     switch (vs->type) {
02285       case VestaSource::immutableDirectory:
02286       case VestaSource::evaluatorDirectory:
02287       case VestaSource::evaluatorROEDirectory:
02288         {
02289             // Try to do copy-on-write
02290             VestaSource* newvs;
02291             VestaSource::errorCode err = vs->makeMutable(newvs);
02292             if (err != VestaSource::ok) {
02293                 status = xlate_vserr(err);
02294                 goto finish;
02295             }
02296             delete vs;
02297             vs = newvs;
02298         }
02299         break;
02300         
02301       default:
02302         status = NFSERR_NOTDIR;
02303         goto finish;
02304         
02305       case VestaSource::appendableDirectory:
02306         if (!vs->master) {
02307           // Creating a (master) directory in a nonmaster parent,
02308           // typically /vesta.  Note that only vwizard can do this.
02309           needMasterHint = true;
02310         }
02311         break;
02312 
02313       case VestaSource::mutableDirectory:
02314       case VestaSource::volatileDirectory:
02315       case VestaSource::volatileROEDirectory:
02316         break;
02317     }
02318     
02319     // Check that name isn't already in use
02320     VestaSource* vs2;
02321     VestaSource::errorCode err;
02322     err = vs->lookup(argp->where.name, vs2, cred);
02323     if (err == VestaSource::ok ||
02324         strcmp(argp->where.name, "") == 0 ||
02325         strcmp(argp->where.name, ".") == 0 ||
02326         strcmp(argp->where.name, "..") == 0) {
02327         if (vs2 != NULL) delete vs2;
02328         status = NFSERR_EXIST;
02329         goto finish;
02330     } else if (err != VestaSource::notFound) {
02331         status = xlate_vserr(err);
02332         goto finish;
02333     }
02334     
02335     // Insert new directory into the parent
02336     if (vs->type == VestaSource::appendableDirectory) {
02337       RECORD_TIME_POINT;
02338         VRLog.start();
02339         err = vs->insertAppendableDirectory(argp->where.name, true, 
02340                                             cred, VestaSource::dontReplace,
02341                                             &newvs);
02342         if (needMasterHint && err == VestaSource::ok) {
02343           // Don't set master-repository hint if it's the empty
02344           // string.
02345           if(!myMasterHint.Empty())
02346             newvs->setAttrib("master-repository", myMasterHint.cchars(), NULL);
02347         }
02348         VRLog.commit();
02349     } else {
02350       RECORD_TIME_POINT;
02351         err = vs->insertMutableDirectory(argp->where.name, NULL, true, 
02352                                          cred, VestaSource::dontReplace,
02353                                          &newvs);
02354     }
02355     RECORD_TIME_POINT;
02356     if (err != VestaSource::ok) {
02357         status = xlate_vserr(err);
02358         goto finish;
02359     }
02360 
02361     // Apply requested attributes and build return value
02362     *(LongId*) &dp->file = newvs->longid;
02363     status = apply_sattr(&argp->attributes, newvs, -1, NULL, &dp->attributes);
02364 
02365   finish:
02366     if (lock != NULL) lock->releaseWrite();
02367     if (newvs != NULL) delete newvs;
02368     if (vs != NULL) delete vs;
02369     return status;
02370 }
02371 
02372 
02373 struct FindLastClosure {
02374     long last;
02375 };
02376 
02377 // Helper for expanding the special $LAST token in a symlink; see below
02378 static bool
02379 findLastCallback(void* closure, VestaSource::typeTag type, Arc arc,
02380                  unsigned int index, unsigned int pseudoInode,
02381                  ShortId filesid, bool master)
02382 {
02383     FindLastClosure* cl = (FindLastClosure*) closure;
02384 
02385     const char *p = arc;
02386     if (type == VestaSource::stub || type == VestaSource::ghost) return true;
02387     if (*p == '0' && p[1] != '\0') return true;
02388     while (*p) {
02389         if (!isdigit(*p)) return true;
02390         p++;
02391     }
02392     long val = strtoul(arc, NULL, 10);
02393     if (val > cl->last) cl->last = val;
02394     return true;
02395 }
02396 
02397 
02398 // Read symbolic link.  
02399 nfsstat
02400 do_readlink(nfs_fh *fh, nfspath np, AccessControl::Identity cred)
02401 {
02402     nfsstat status = NFS_OK;
02403     ReadersWritersLock* lock;
02404     VestaSource* vs = ((LongId*) fh)->lookup(LongId::readLock, &lock);
02405     if (vs == NULL) {
02406       if(*((LongId*) fh) == NullLongId)
02407         {
02408           status = NFSERR_INVAL;
02409         }
02410       else
02411         {
02412           status = NFSERR_STALE;
02413           stalemsg("readlink", (LongId*) fh);
02414         }
02415       goto finish;
02416     }
02417 
02418     RWLOCK_LOCKED_REASON(lock, "NFS:readlink");
02419 
02420     if (shortIdSymlink 
02421         && FileShortIdRootLongId.isAncestorOf(vs->longid)) {
02422         //
02423         // Optionally manifest an immutable file in a
02424         // volatileROEDirectory (or evaluatorROEDirectory) as a
02425         // symbolic link to its underlying shortid file.  Feature
02426         // currently unused.
02427         //
02428         char *name = SourceOrDerived::shortIdToName(vs->shortId());
02429         strcpy(np, shortIdSymlinkPrefix.cchars());
02430         strcat(np, name);
02431         if (shortIdSymlinkLength == 0) shortIdSymlinkLength = strlen(np);
02432         delete[] name;
02433     } else if (vs->type == VestaSource::stub ||
02434                vs->type == VestaSource::ghost) {
02435         //
02436         // A stub or ghost is manifested as a symbolic link
02437         // when viewed through the NFS interface if it has the mutable
02438         // attribute "symlink-to"; the attribute's value is either
02439         // the link's value, or the special token $LAST.
02440         //
02441         const char *value = vs->getAttribConst("symlink-to");
02442         if (value == NULL) {
02443             status = NFSERR_INVAL;
02444         } else if (strcmp(value, "$LAST") == 0) {
02445             //
02446             // The symlink's value is the arc in the current directory
02447             // that consists entirely of decimal digits, has no leading
02448             // zeroes, is not bound to a ghost or stub, and has the
02449             // largest numeric value of all such arcs.  If there are no
02450             // such arcs, the value is -1.
02451             //
02452             FindLastClosure cl;
02453             cl.last = -1;
02454             VestaSource* parent = vs->longid.getParent().lookup();
02455             parent->list(0, findLastCallback, &cl, NULL);
02456             sprintf(np, "%d", cl.last);
02457             delete parent;
02458         } else {
02459             strcpy(np, value);
02460         }
02461     } else {
02462         status = NFSERR_INVAL;
02463     }
02464 
02465     delete vs;
02466   finish:
02467     if (lock != NULL) lock->releaseRead();
02468     return status;
02469 }
02470 
02471 static Text statfs_filename;
02472 
02473 // Translate statfs into statfs of the metadata root. This will not
02474 // be too meaningful unless all the metadata and the sid files are
02475 // on the same filesystem.
02476 //
02477 nfsstat
02478 do_statfs(nfs_fh* argp, result_types* resp, AccessControl::Identity cred)
02479 {
02480     nfsstat status = NFS_OK;
02481     struct statfs stfs;
02482     int ok = statfs(statfs_filename.cchars(), &stfs);
02483     if (ok == -1) {
02484         status = xlate_errno(errno);
02485         goto finish;
02486     }
02487     
02488     resp->r_statfsres.statfsres_u.reply.tsize = stfs.f_bsize;
02489 #if __linux__
02490     /* !! Not sure this is right: */
02491     resp->r_statfsres.statfsres_u.reply.bsize = stfs.f_bsize;
02492 #else
02493     resp->r_statfsres.statfsres_u.reply.bsize = stfs.f_fsize;
02494 #endif
02495     resp->r_statfsres.statfsres_u.reply.blocks = stfs.f_blocks;
02496     resp->r_statfsres.statfsres_u.reply.bfree = stfs.f_bfree;
02497     resp->r_statfsres.statfsres_u.reply.bavail = stfs.f_bavail;
02498 
02499   finish:
02500     resp->r_statfsres.status = status;
02501     return status;
02502 }
02503 
02504 void
02505 GlueInit()
02506 {
02507     statfs_filename = VestaConfig::get_Text("Repository", "metadata_root")
02508       + VestaConfig::get_Text("Repository", "sid_dir") + "statfs_target";
02509     int fd = creat(statfs_filename.cchars(), 0666);
02510     if (fd < 0) {
02511         Repos::dprintf(DBG_ALWAYS,
02512                        "error creating statfs_target, errno = %d\n", errno);
02513     } else {
02514         close(fd);
02515     }
02516     Text t;
02517     if (VestaConfig::get("Repository", "ShortId_symlink", t)) {
02518         shortIdSymlinkPrefix = t;
02519         shortIdSymlink = true;
02520     }
02521     allowSymlink =
02522       (bool) VestaConfig::get_int("Repository", "allow_symlink");
02523 
02524     if (VestaConfig::get("Repository", "cow_max", t)) {
02525         cowMax = atoi(t.cchars());
02526     }
02527     cowInProgress = NEW_PTRFREE_ARRAY(CowInProgress, cowMax);
02528     int i;
02529     for (i=0; i<cowMax; i++) {
02530       cowInProgress[i].active = false;
02531     }
02532 
02533     bsdChown =
02534       (VestaConfig::get_Text("Repository", "chown_semantics")=="BSD");
02535 }

Generated on Mon May 8 00:48:44 2006 for Vesta by  doxygen 1.4.2