nfs-ganesha 1.4

nfs4_recovery.c

Go to the documentation of this file.
00001 /*
00002  * vim:expandtab:shiftwidth=8:tabstop=8:
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU Lesser General Public
00006  * License as published by the Free Software Foundation; either
00007  * version 3 of the License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * Lesser General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU Lesser General Public
00015  * License along with this library; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00017  *
00018  * ---------------------------------------
00019  */
00020 
00021 /*
00022  *
00023  * nfs4_recovery.c : Some functions to manage NFSv4 recovery
00024  *
00025  */
00026 
00027 #ifdef HAVE_CONFIG_H
00028 #include "config.h"
00029 #endif
00030 
00031 #ifdef _SOLARIS
00032 #include "solaris_port.h"
00033 #endif
00034 
00035 #include "log.h"
00036 #include "nfs_core.h"
00037 #include "nfs4.h"
00038 #include "sal_functions.h"
00039 #include <sys/stat.h>
00040 #include <sys/types.h>
00041 
00042 #define NFS_V4_RECOV_ROOT "/var/lib/nfs/ganesha"
00043 #define NFS_V4_RECOV_DIR "v4recov"
00044 #define NFS_V4_OLD_DIR "v4old"
00045 
00046 char v4_recov_dir[PATH_MAX];
00047 char v4_old_dir[PATH_MAX];
00048 
00049 /*
00050  * construct to enable grace period, this could be expanded to implement
00051  * grace instances, where a new grace period is started for every
00052  * failover.  for now keep it simple, just a global used by all clients.
00053  */
00054 typedef struct grace
00055 {
00056         pthread_mutex_t g_mutex;
00057         time_t g_start;
00058         time_t g_duration;
00059         struct glist_head g_clid_list;
00060 } grace_t;
00061 
00062 static grace_t grace;
00063 
00064 typedef struct clid_entry
00065 {
00066         struct glist_head cl_list;
00067         char cl_name[256];
00068 } clid_entry_t;
00069 
00070 static void nfs4_load_recov_clids_nolock(ushort);
00071 
00072 void
00073 nfs4_init_grace()
00074 {
00075         init_glist(&grace.g_clid_list);
00076         (void)pthread_mutex_init(&grace.g_mutex, NULL);
00077 }
00078 
00079 /*
00080  * Routine to start grace period.  Can be called due to server start/restart
00081  * or from failover code.  If this node is taking over for a node, that nodeid
00082  * will be passed to this routine inside of the grace start structure.
00083  */
00084 
00085 void
00086 nfs4_start_grace(nfs_grace_start_t *gsp)
00087 {
00088         int duration;
00089 
00090         /* limit the grace period to a maximum of 45 seconds */
00091         duration = MIN(60, nfs_param.nfsv4_param.lease_lifetime);
00092 
00093         P(grace.g_mutex);
00094 
00095         /*
00096          * if called from failover code and given a nodeid, then this node
00097          * is doing a take over.  read in the client ids from the failing node
00098          */
00099         if (gsp && gsp->nodeid != 0)
00100                 nfs4_load_recov_clids_nolock(gsp->nodeid);
00101 
00102         LogDebug(COMPONENT_STATE, "grace period started, duration(%d)",
00103             duration);
00104 
00105         grace.g_start = time(NULL);
00106         grace.g_duration = duration;
00107 
00108         V(grace.g_mutex);
00109 }
00110 
00111 int
00112 nfs_in_grace()
00113 {
00114         int gp;
00115 
00116         P(grace.g_mutex);
00117 
00118         gp = ((grace.g_start + grace.g_duration) > time(NULL));
00119 
00120         V(grace.g_mutex);
00121 
00122         LogDebug(COMPONENT_STATE, "in grace period  == %d", gp);
00123 
00124         return gp;
00125 }
00126 
00127 /*
00128  * generate a name that identifies this client.  this name will be used to
00129  * know that a client was talking to the server before a restart so that it
00130  * will be allowed to do reclaims during grace period.
00131  */
00132 void
00133 nfs4_create_clid_name(nfs_client_record_t *cl_recp, nfs_client_id_t *pclientid,
00134     struct svc_req *svcp)
00135 {
00136         int i;
00137         sockaddr_t sa;
00138         char buf[SOCK_NAME_MAX];
00139         longlong_t cl_val = 0;
00140 
00141         pclientid->cid_recov_dir = gsh_malloc(256);
00142         if (pclientid->cid_recov_dir == NULL) {
00143                 LogEvent(COMPONENT_CLIENTID, "Mem_Alloc FAILED");
00144                 return;
00145         }
00146         /* get the caller's IP addr */
00147         if (copy_xprt_addr(&sa, svcp->rq_xprt))
00148                 sprint_sockip(&sa, buf, SOCK_NAME_MAX);
00149         else
00150                 strncpy(buf, "Unknown", SOCK_NAME_MAX);
00151 
00152         for (i = 0; i < cl_recp->cr_client_val_len; i++)
00153                 cl_val += cl_recp->cr_client_val[i];
00154 
00155         (void) snprintf(pclientid->cid_recov_dir, 256, "%s-%llx", buf, cl_val);
00156 
00157         LogDebug(COMPONENT_CLIENTID, "Created client name [%s]",
00158             pclientid->cid_recov_dir);
00159 }
00160 
00161 /*
00162  * create an entry in the recovery directory for this client so that it
00163  * will be able to reclaim state after a server reboot/restart.
00164  */
00165 void
00166 nfs4_add_clid(nfs_client_id_t *pclientid)
00167 {
00168         int err;
00169         char path[PATH_MAX];
00170 
00171         if (pclientid->cid_recov_dir == NULL) {
00172                 LogDebug(COMPONENT_CLIENTID,
00173                     "Failed to create client in recovery dir, no name");
00174                 return;
00175         }
00176 
00177         snprintf(path, PATH_MAX, "%s/%s", v4_recov_dir,
00178             pclientid->cid_recov_dir);
00179 
00180         err = mkdir(path, 0700);
00181         if (err == -1 && errno != EEXIST) {
00182                 LogEvent(COMPONENT_CLIENTID,
00183                     "Failed to create client in recovery dir (%s), errno=%d",
00184                     path, errno);
00185         } else {
00186                 LogDebug(COMPONENT_CLIENTID, "Created client dir [%s]", path);
00187         }
00188 }
00189 
00190 /*
00191  * remove a client entry from the recovery directory.  this would be called
00192  * when a client expires.
00193  */
00194 void
00195 nfs4_rm_clid(char *recov_dir)
00196 {
00197         int err;
00198         char path[PATH_MAX];
00199 
00200         if (recov_dir == NULL)
00201                 return;
00202 
00203         snprintf(path, PATH_MAX, "%s/%s", v4_recov_dir, recov_dir);
00204 
00205         err = rmdir(path);
00206         if (err == -1) {
00207                 LogEvent(COMPONENT_CLIENTID,
00208                     "Failed to remove client in recovery dir (%s), errno=%d",
00209                     path, errno);
00210         }
00211 }
00212 
00213 /*
00214  * determine whether or not this client is allowed to do reclaim operations.
00215  * if the server is not in grace period, then no reclaim can happen.
00216  */
00217 void
00218 nfs4_chk_clid(nfs_client_id_t *pclientid)
00219 {
00220         struct glist_head *node;
00221         clid_entry_t *clid_ent;
00222 
00223         /* If we aren't in grace period, then reclaim is not possible */
00224         if (!nfs_in_grace())
00225                 return;
00226 
00227         P(grace.g_mutex);
00228 
00229         /* If there were no clients at time of restart, we're done */
00230         if (glist_empty(&grace.g_clid_list)) {
00231                 V(grace.g_mutex);
00232                 return;
00233         }
00234 
00235         /*
00236          * loop through the list and try to find this client.  if we
00237          * find it, mark it to allow reclaims.  perhaps the client should
00238          * be removed from the list at this point to make the list shorter?
00239          */
00240         glist_for_each(node, &grace.g_clid_list) {
00241                 clid_ent = glist_entry(node, clid_entry_t, cl_list);
00242                 LogDebug(COMPONENT_CLIENTID, "compare %s to %s",
00243                     clid_ent->cl_name, pclientid->cid_recov_dir);
00244                 if (!strncmp(clid_ent->cl_name, pclientid->cid_recov_dir,
00245                     256)) {
00246                         if (isDebug(COMPONENT_CLIENTID)) {
00247                             char str[HASHTABLE_DISPLAY_STRLEN];
00248 
00249                             display_client_id_rec(pclientid, str);
00250 
00251                             LogFullDebug(COMPONENT_CLIENTID,
00252                                          "Allowed to reclaim ClientId %s",
00253                                          str);
00254                         }
00255                         pclientid->cid_allow_reclaim = 1;
00256                         V(grace.g_mutex);
00257                         return;
00258                 }
00259         }
00260         V(grace.g_mutex);
00261 }
00262 
00263 /*
00264  * create the client reclaim list.
00265  * when not doing a take over, first open the old state dir and read in
00266  * those entries.  the reason for the two directories is in case of a 
00267  * reboot/restart during grace period.  next, read in entries from the
00268  * recovery directory and then move them into the old state directory.
00269  * if called due to a take over, nodeid will be nonzero.  in this case,
00270  * add that node's clientids to the existing list.  then move those
00271  * entries into the old state directory.
00272  */
00273 static int
00274 nfs4_read_recov_clids(DIR *dp, char *srcdir, int takeover)
00275 {
00276         struct dirent *dentp;
00277         clid_entry_t *new_ent;
00278         char src[PATH_MAX], dest[PATH_MAX];
00279         int rc;
00280 
00281         dentp = readdir(dp);
00282         while (dentp != NULL) {
00283                 /* don't add '.' and '..', or any '.*' entry */
00284                 if (dentp->d_name[0] != '.') {
00285                         new_ent = gsh_malloc(sizeof(clid_entry_t));
00286                         if (new_ent == NULL) {
00287                                 LogEvent(COMPONENT_CLIENTID,
00288                                          "Unable to allocate memory.");
00289                                 return -1;
00290                         }
00291                         strncpy(new_ent->cl_name, dentp->d_name, 256);
00292                         glist_add(&grace.g_clid_list, &new_ent->cl_list);
00293                         LogDebug(COMPONENT_CLIENTID, "added %s to clid list",
00294                             new_ent->cl_name);
00295                         if (srcdir != NULL) {
00296                                 (void) snprintf(src, PATH_MAX, "%s/%s",
00297                                     srcdir, dentp->d_name);
00298                                 (void) snprintf(dest, PATH_MAX, "%s/%s",
00299                                     v4_old_dir, dentp->d_name);
00300                                 if (takeover)
00301                                         rc = mkdir(dest, 0700);
00302                                 else
00303                                         rc = rename(src, dest);
00304                                 if (rc == -1) {
00305                                         LogEvent(COMPONENT_CLIENTID,
00306                                           "Failed to make dir (%s), errno=%d",
00307                                           dest, errno);
00308                                 }
00309                         }
00310                 }
00311                 dentp = readdir(dp);
00312         }
00313 
00314         return 0;
00315 }
00316 
00317 static void
00318 nfs4_load_recov_clids_nolock(ushort nodeid)
00319 {
00320         DIR *dp;
00321         struct glist_head *node;
00322         clid_entry_t *clid_entry;
00323         int rc;
00324         char path[PATH_MAX];
00325 
00326         if (nodeid == 0) {
00327                 /* when not doing a takeover, start with an empty list */
00328                 if (!glist_empty(&grace.g_clid_list)) {
00329                         glist_for_each(node, &grace.g_clid_list) {
00330                                 glist_del(node);
00331                                 clid_entry = glist_entry(node,
00332                                     clid_entry_t, cl_list);
00333                                 gsh_free(clid_entry);
00334                         }
00335                 }
00336 
00337                 dp = opendir(v4_old_dir);
00338                 if (dp == NULL) {
00339                         LogEvent(COMPONENT_CLIENTID,
00340                             "Failed to open v4 recovery dir (%s), errno=%d",
00341                             v4_old_dir, errno);
00342                         return;
00343                 }
00344                 rc = nfs4_read_recov_clids(dp, NULL, 0);
00345                 if (rc == -1) {
00346                         (void) closedir(dp);
00347                         LogEvent(COMPONENT_CLIENTID,
00348                             "Failed to read v4 recovery dir (%s)", v4_old_dir);
00349                         return;
00350                 }
00351                 (void) closedir(dp);
00352 
00353                 dp = opendir(v4_recov_dir);
00354                 if (dp == NULL) {
00355                         LogEvent(COMPONENT_CLIENTID,
00356                             "Failed to open v4 recovery dir (%s), errno=%d",
00357                             v4_recov_dir, errno);
00358                         return;
00359                 }
00360 
00361                 rc = nfs4_read_recov_clids(dp, v4_recov_dir, 0);
00362                 if (rc == -1) {
00363                         (void) closedir(dp);
00364                         LogEvent(COMPONENT_CLIENTID,
00365                             "Failed to read v4 recovery dir (%s)",
00366                             v4_recov_dir);
00367                         return;
00368                 }
00369                 rc = closedir(dp);
00370                 if (rc == -1) {
00371                         LogEvent(COMPONENT_CLIENTID,
00372                             "Failed to close v4 recovery dir (%s), errno=%d",
00373                             v4_recov_dir, errno);
00374                 }
00375 
00376         } else {
00377                 snprintf(path, PATH_MAX, "%s/%s/node%d",
00378                     NFS_V4_RECOV_ROOT, NFS_V4_RECOV_DIR, nodeid);
00379 
00380                 dp = opendir(path);
00381                 if (dp == NULL) {
00382                         LogEvent(COMPONENT_CLIENTID,
00383                             "Failed to open v4 recovery dir (%s), errno=%d",
00384                             path, errno);
00385                         return;
00386                 }
00387 
00388                 rc = nfs4_read_recov_clids(dp, path, 1);
00389                 if (rc == -1) {
00390                         (void) closedir(dp);
00391                         LogEvent(COMPONENT_CLIENTID,
00392                             "Failed to read v4 recovery dir (%s)",
00393                             path);
00394                         return;
00395                 }
00396                 rc = closedir(dp);
00397                 if (rc == -1) {
00398                         LogEvent(COMPONENT_CLIENTID,
00399                             "Failed to close v4 recovery dir (%s), errno=%d",
00400                             path, errno);
00401                 }
00402         }
00403 
00404 }
00405 
00406 void
00407 nfs4_load_recov_clids(ushort nodeid)
00408 {
00409         P(grace.g_mutex);
00410 
00411         nfs4_load_recov_clids_nolock(nodeid);
00412 
00413         V(grace.g_mutex);
00414 }
00415 
00416 void
00417 nfs4_clean_old_recov_dir()
00418 {
00419         DIR *dp;
00420         struct dirent *dentp;
00421         char path[PATH_MAX];
00422         int rc;
00423 
00424         dp = opendir(v4_old_dir);
00425         if (dp == NULL) {
00426                 LogEvent(COMPONENT_CLIENTID,
00427                     "Failed to open old v4 recovery dir (%s), errno=%d",
00428                     v4_old_dir, errno);
00429                 return;
00430         }
00431 
00432         for (dentp = readdir(dp); dentp != NULL; dentp = readdir(dp)) {
00433                 /* don't remove '.' and '..', or any '.*' entry */
00434                 if (dentp->d_name[0] == '.')
00435                         continue;
00436 
00437                 (void) snprintf(path, PATH_MAX, "%s/%s",
00438                     v4_old_dir, dentp->d_name);
00439 
00440                 rc = rmdir(path);
00441                 if (rc == -1) {
00442                         LogEvent(COMPONENT_CLIENTID,
00443                             "Failed to remove %s, errno=%d",
00444                             path, errno);
00445                 }
00446         }
00447 }
00448 
00449 /*
00450  * the recovery directory may not exist yet, so create it.  this should
00451  * only need to be done once (if at all).  also, the location of the
00452  * directory could be configurable.
00453  */
00454 void
00455 nfs4_create_recov_dir()
00456 {
00457         int err;
00458 
00459         err = mkdir(NFS_V4_RECOV_ROOT, 0755);
00460         if (err == -1 && errno != EEXIST) {
00461                 LogEvent(COMPONENT_CLIENTID,
00462                     "Failed to create v4 recovery dir (%s), errno=%d",
00463                     NFS_V4_RECOV_ROOT, errno);
00464         }
00465 
00466         snprintf(v4_recov_dir, PATH_MAX, "%s/%s",
00467             NFS_V4_RECOV_ROOT, NFS_V4_RECOV_DIR);
00468         err = mkdir(v4_recov_dir, 0755);
00469         if (err == -1 && errno != EEXIST) {
00470                 LogEvent(COMPONENT_CLIENTID,
00471                     "Failed to create v4 recovery dir(%s), errno=%d",
00472                     v4_recov_dir, errno);
00473         }
00474 
00475         snprintf(v4_old_dir, PATH_MAX, "%s/%s",
00476             NFS_V4_RECOV_ROOT, NFS_V4_OLD_DIR);
00477         err = mkdir(v4_old_dir, 0755);
00478         if (err == -1 && errno != EEXIST) {
00479                 LogEvent(COMPONENT_CLIENTID,
00480                     "Failed to create v4 recovery dir(%s), errno=%d",
00481                     v4_old_dir, errno);
00482         }
00483         if (nfs_param.core_param.clustered) {
00484                 snprintf(v4_recov_dir, PATH_MAX, "%s/%s/node%d",
00485                     NFS_V4_RECOV_ROOT, NFS_V4_RECOV_DIR, g_nodeid);
00486 
00487                 err = mkdir(v4_recov_dir, 0755);
00488                 if (err == -1 && errno != EEXIST) {
00489                         LogEvent(COMPONENT_CLIENTID,
00490                             "Failed to create v4 recovery dir(%s), errno=%d",
00491                             v4_recov_dir, errno);
00492                 }
00493 
00494                 snprintf(v4_old_dir, PATH_MAX, "%s/%s/node%d",
00495                     NFS_V4_RECOV_ROOT, NFS_V4_OLD_DIR, g_nodeid);
00496 
00497                 err = mkdir(v4_old_dir, 0755);
00498                 if (err == -1 && errno != EEXIST) {
00499                         LogEvent(COMPONENT_CLIENTID,
00500                             "Failed to create v4 recovery dir(%s), errno=%d",
00501                             v4_old_dir, errno);
00502                 }
00503         }
00504 }