nfs-ganesha 1.4
|
00001 /* 00002 * vim:expandtab:shiftwidth=8:tabstop=8: 00003 * 00004 * This program is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU Lesser General Public 00006 * License as published by the Free Software Foundation; either 00007 * version 3 of the License, or (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 * Lesser General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU Lesser General Public 00015 * License along with this library; if not, write to the Free Software 00016 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00017 * 00018 * --------------------------------------- 00019 */ 00020 00021 /* 00022 * 00023 * nfs4_recovery.c : Some functions to manage NFSv4 recovery 00024 * 00025 */ 00026 00027 #ifdef HAVE_CONFIG_H 00028 #include "config.h" 00029 #endif 00030 00031 #ifdef _SOLARIS 00032 #include "solaris_port.h" 00033 #endif 00034 00035 #include "log.h" 00036 #include "nfs_core.h" 00037 #include "nfs4.h" 00038 #include "sal_functions.h" 00039 #include <sys/stat.h> 00040 #include <sys/types.h> 00041 00042 #define NFS_V4_RECOV_ROOT "/var/lib/nfs/ganesha" 00043 #define NFS_V4_RECOV_DIR "v4recov" 00044 #define NFS_V4_OLD_DIR "v4old" 00045 00046 char v4_recov_dir[PATH_MAX]; 00047 char v4_old_dir[PATH_MAX]; 00048 00049 /* 00050 * construct to enable grace period, this could be expanded to implement 00051 * grace instances, where a new grace period is started for every 00052 * failover. for now keep it simple, just a global used by all clients. 00053 */ 00054 typedef struct grace 00055 { 00056 pthread_mutex_t g_mutex; 00057 time_t g_start; 00058 time_t g_duration; 00059 struct glist_head g_clid_list; 00060 } grace_t; 00061 00062 static grace_t grace; 00063 00064 typedef struct clid_entry 00065 { 00066 struct glist_head cl_list; 00067 char cl_name[256]; 00068 } clid_entry_t; 00069 00070 static void nfs4_load_recov_clids_nolock(ushort); 00071 00072 void 00073 nfs4_init_grace() 00074 { 00075 init_glist(&grace.g_clid_list); 00076 (void)pthread_mutex_init(&grace.g_mutex, NULL); 00077 } 00078 00079 /* 00080 * Routine to start grace period. Can be called due to server start/restart 00081 * or from failover code. If this node is taking over for a node, that nodeid 00082 * will be passed to this routine inside of the grace start structure. 00083 */ 00084 00085 void 00086 nfs4_start_grace(nfs_grace_start_t *gsp) 00087 { 00088 int duration; 00089 00090 /* limit the grace period to a maximum of 45 seconds */ 00091 duration = MIN(60, nfs_param.nfsv4_param.lease_lifetime); 00092 00093 P(grace.g_mutex); 00094 00095 /* 00096 * if called from failover code and given a nodeid, then this node 00097 * is doing a take over. read in the client ids from the failing node 00098 */ 00099 if (gsp && gsp->nodeid != 0) 00100 nfs4_load_recov_clids_nolock(gsp->nodeid); 00101 00102 LogDebug(COMPONENT_STATE, "grace period started, duration(%d)", 00103 duration); 00104 00105 grace.g_start = time(NULL); 00106 grace.g_duration = duration; 00107 00108 V(grace.g_mutex); 00109 } 00110 00111 int 00112 nfs_in_grace() 00113 { 00114 int gp; 00115 00116 P(grace.g_mutex); 00117 00118 gp = ((grace.g_start + grace.g_duration) > time(NULL)); 00119 00120 V(grace.g_mutex); 00121 00122 LogDebug(COMPONENT_STATE, "in grace period == %d", gp); 00123 00124 return gp; 00125 } 00126 00127 /* 00128 * generate a name that identifies this client. this name will be used to 00129 * know that a client was talking to the server before a restart so that it 00130 * will be allowed to do reclaims during grace period. 00131 */ 00132 void 00133 nfs4_create_clid_name(nfs_client_record_t *cl_recp, nfs_client_id_t *pclientid, 00134 struct svc_req *svcp) 00135 { 00136 int i; 00137 sockaddr_t sa; 00138 char buf[SOCK_NAME_MAX]; 00139 longlong_t cl_val = 0; 00140 00141 pclientid->cid_recov_dir = gsh_malloc(256); 00142 if (pclientid->cid_recov_dir == NULL) { 00143 LogEvent(COMPONENT_CLIENTID, "Mem_Alloc FAILED"); 00144 return; 00145 } 00146 /* get the caller's IP addr */ 00147 if (copy_xprt_addr(&sa, svcp->rq_xprt)) 00148 sprint_sockip(&sa, buf, SOCK_NAME_MAX); 00149 else 00150 strncpy(buf, "Unknown", SOCK_NAME_MAX); 00151 00152 for (i = 0; i < cl_recp->cr_client_val_len; i++) 00153 cl_val += cl_recp->cr_client_val[i]; 00154 00155 (void) snprintf(pclientid->cid_recov_dir, 256, "%s-%llx", buf, cl_val); 00156 00157 LogDebug(COMPONENT_CLIENTID, "Created client name [%s]", 00158 pclientid->cid_recov_dir); 00159 } 00160 00161 /* 00162 * create an entry in the recovery directory for this client so that it 00163 * will be able to reclaim state after a server reboot/restart. 00164 */ 00165 void 00166 nfs4_add_clid(nfs_client_id_t *pclientid) 00167 { 00168 int err; 00169 char path[PATH_MAX]; 00170 00171 if (pclientid->cid_recov_dir == NULL) { 00172 LogDebug(COMPONENT_CLIENTID, 00173 "Failed to create client in recovery dir, no name"); 00174 return; 00175 } 00176 00177 snprintf(path, PATH_MAX, "%s/%s", v4_recov_dir, 00178 pclientid->cid_recov_dir); 00179 00180 err = mkdir(path, 0700); 00181 if (err == -1 && errno != EEXIST) { 00182 LogEvent(COMPONENT_CLIENTID, 00183 "Failed to create client in recovery dir (%s), errno=%d", 00184 path, errno); 00185 } else { 00186 LogDebug(COMPONENT_CLIENTID, "Created client dir [%s]", path); 00187 } 00188 } 00189 00190 /* 00191 * remove a client entry from the recovery directory. this would be called 00192 * when a client expires. 00193 */ 00194 void 00195 nfs4_rm_clid(char *recov_dir) 00196 { 00197 int err; 00198 char path[PATH_MAX]; 00199 00200 if (recov_dir == NULL) 00201 return; 00202 00203 snprintf(path, PATH_MAX, "%s/%s", v4_recov_dir, recov_dir); 00204 00205 err = rmdir(path); 00206 if (err == -1) { 00207 LogEvent(COMPONENT_CLIENTID, 00208 "Failed to remove client in recovery dir (%s), errno=%d", 00209 path, errno); 00210 } 00211 } 00212 00213 /* 00214 * determine whether or not this client is allowed to do reclaim operations. 00215 * if the server is not in grace period, then no reclaim can happen. 00216 */ 00217 void 00218 nfs4_chk_clid(nfs_client_id_t *pclientid) 00219 { 00220 struct glist_head *node; 00221 clid_entry_t *clid_ent; 00222 00223 /* If we aren't in grace period, then reclaim is not possible */ 00224 if (!nfs_in_grace()) 00225 return; 00226 00227 P(grace.g_mutex); 00228 00229 /* If there were no clients at time of restart, we're done */ 00230 if (glist_empty(&grace.g_clid_list)) { 00231 V(grace.g_mutex); 00232 return; 00233 } 00234 00235 /* 00236 * loop through the list and try to find this client. if we 00237 * find it, mark it to allow reclaims. perhaps the client should 00238 * be removed from the list at this point to make the list shorter? 00239 */ 00240 glist_for_each(node, &grace.g_clid_list) { 00241 clid_ent = glist_entry(node, clid_entry_t, cl_list); 00242 LogDebug(COMPONENT_CLIENTID, "compare %s to %s", 00243 clid_ent->cl_name, pclientid->cid_recov_dir); 00244 if (!strncmp(clid_ent->cl_name, pclientid->cid_recov_dir, 00245 256)) { 00246 if (isDebug(COMPONENT_CLIENTID)) { 00247 char str[HASHTABLE_DISPLAY_STRLEN]; 00248 00249 display_client_id_rec(pclientid, str); 00250 00251 LogFullDebug(COMPONENT_CLIENTID, 00252 "Allowed to reclaim ClientId %s", 00253 str); 00254 } 00255 pclientid->cid_allow_reclaim = 1; 00256 V(grace.g_mutex); 00257 return; 00258 } 00259 } 00260 V(grace.g_mutex); 00261 } 00262 00263 /* 00264 * create the client reclaim list. 00265 * when not doing a take over, first open the old state dir and read in 00266 * those entries. the reason for the two directories is in case of a 00267 * reboot/restart during grace period. next, read in entries from the 00268 * recovery directory and then move them into the old state directory. 00269 * if called due to a take over, nodeid will be nonzero. in this case, 00270 * add that node's clientids to the existing list. then move those 00271 * entries into the old state directory. 00272 */ 00273 static int 00274 nfs4_read_recov_clids(DIR *dp, char *srcdir, int takeover) 00275 { 00276 struct dirent *dentp; 00277 clid_entry_t *new_ent; 00278 char src[PATH_MAX], dest[PATH_MAX]; 00279 int rc; 00280 00281 dentp = readdir(dp); 00282 while (dentp != NULL) { 00283 /* don't add '.' and '..', or any '.*' entry */ 00284 if (dentp->d_name[0] != '.') { 00285 new_ent = gsh_malloc(sizeof(clid_entry_t)); 00286 if (new_ent == NULL) { 00287 LogEvent(COMPONENT_CLIENTID, 00288 "Unable to allocate memory."); 00289 return -1; 00290 } 00291 strncpy(new_ent->cl_name, dentp->d_name, 256); 00292 glist_add(&grace.g_clid_list, &new_ent->cl_list); 00293 LogDebug(COMPONENT_CLIENTID, "added %s to clid list", 00294 new_ent->cl_name); 00295 if (srcdir != NULL) { 00296 (void) snprintf(src, PATH_MAX, "%s/%s", 00297 srcdir, dentp->d_name); 00298 (void) snprintf(dest, PATH_MAX, "%s/%s", 00299 v4_old_dir, dentp->d_name); 00300 if (takeover) 00301 rc = mkdir(dest, 0700); 00302 else 00303 rc = rename(src, dest); 00304 if (rc == -1) { 00305 LogEvent(COMPONENT_CLIENTID, 00306 "Failed to make dir (%s), errno=%d", 00307 dest, errno); 00308 } 00309 } 00310 } 00311 dentp = readdir(dp); 00312 } 00313 00314 return 0; 00315 } 00316 00317 static void 00318 nfs4_load_recov_clids_nolock(ushort nodeid) 00319 { 00320 DIR *dp; 00321 struct glist_head *node; 00322 clid_entry_t *clid_entry; 00323 int rc; 00324 char path[PATH_MAX]; 00325 00326 if (nodeid == 0) { 00327 /* when not doing a takeover, start with an empty list */ 00328 if (!glist_empty(&grace.g_clid_list)) { 00329 glist_for_each(node, &grace.g_clid_list) { 00330 glist_del(node); 00331 clid_entry = glist_entry(node, 00332 clid_entry_t, cl_list); 00333 gsh_free(clid_entry); 00334 } 00335 } 00336 00337 dp = opendir(v4_old_dir); 00338 if (dp == NULL) { 00339 LogEvent(COMPONENT_CLIENTID, 00340 "Failed to open v4 recovery dir (%s), errno=%d", 00341 v4_old_dir, errno); 00342 return; 00343 } 00344 rc = nfs4_read_recov_clids(dp, NULL, 0); 00345 if (rc == -1) { 00346 (void) closedir(dp); 00347 LogEvent(COMPONENT_CLIENTID, 00348 "Failed to read v4 recovery dir (%s)", v4_old_dir); 00349 return; 00350 } 00351 (void) closedir(dp); 00352 00353 dp = opendir(v4_recov_dir); 00354 if (dp == NULL) { 00355 LogEvent(COMPONENT_CLIENTID, 00356 "Failed to open v4 recovery dir (%s), errno=%d", 00357 v4_recov_dir, errno); 00358 return; 00359 } 00360 00361 rc = nfs4_read_recov_clids(dp, v4_recov_dir, 0); 00362 if (rc == -1) { 00363 (void) closedir(dp); 00364 LogEvent(COMPONENT_CLIENTID, 00365 "Failed to read v4 recovery dir (%s)", 00366 v4_recov_dir); 00367 return; 00368 } 00369 rc = closedir(dp); 00370 if (rc == -1) { 00371 LogEvent(COMPONENT_CLIENTID, 00372 "Failed to close v4 recovery dir (%s), errno=%d", 00373 v4_recov_dir, errno); 00374 } 00375 00376 } else { 00377 snprintf(path, PATH_MAX, "%s/%s/node%d", 00378 NFS_V4_RECOV_ROOT, NFS_V4_RECOV_DIR, nodeid); 00379 00380 dp = opendir(path); 00381 if (dp == NULL) { 00382 LogEvent(COMPONENT_CLIENTID, 00383 "Failed to open v4 recovery dir (%s), errno=%d", 00384 path, errno); 00385 return; 00386 } 00387 00388 rc = nfs4_read_recov_clids(dp, path, 1); 00389 if (rc == -1) { 00390 (void) closedir(dp); 00391 LogEvent(COMPONENT_CLIENTID, 00392 "Failed to read v4 recovery dir (%s)", 00393 path); 00394 return; 00395 } 00396 rc = closedir(dp); 00397 if (rc == -1) { 00398 LogEvent(COMPONENT_CLIENTID, 00399 "Failed to close v4 recovery dir (%s), errno=%d", 00400 path, errno); 00401 } 00402 } 00403 00404 } 00405 00406 void 00407 nfs4_load_recov_clids(ushort nodeid) 00408 { 00409 P(grace.g_mutex); 00410 00411 nfs4_load_recov_clids_nolock(nodeid); 00412 00413 V(grace.g_mutex); 00414 } 00415 00416 void 00417 nfs4_clean_old_recov_dir() 00418 { 00419 DIR *dp; 00420 struct dirent *dentp; 00421 char path[PATH_MAX]; 00422 int rc; 00423 00424 dp = opendir(v4_old_dir); 00425 if (dp == NULL) { 00426 LogEvent(COMPONENT_CLIENTID, 00427 "Failed to open old v4 recovery dir (%s), errno=%d", 00428 v4_old_dir, errno); 00429 return; 00430 } 00431 00432 for (dentp = readdir(dp); dentp != NULL; dentp = readdir(dp)) { 00433 /* don't remove '.' and '..', or any '.*' entry */ 00434 if (dentp->d_name[0] == '.') 00435 continue; 00436 00437 (void) snprintf(path, PATH_MAX, "%s/%s", 00438 v4_old_dir, dentp->d_name); 00439 00440 rc = rmdir(path); 00441 if (rc == -1) { 00442 LogEvent(COMPONENT_CLIENTID, 00443 "Failed to remove %s, errno=%d", 00444 path, errno); 00445 } 00446 } 00447 } 00448 00449 /* 00450 * the recovery directory may not exist yet, so create it. this should 00451 * only need to be done once (if at all). also, the location of the 00452 * directory could be configurable. 00453 */ 00454 void 00455 nfs4_create_recov_dir() 00456 { 00457 int err; 00458 00459 err = mkdir(NFS_V4_RECOV_ROOT, 0755); 00460 if (err == -1 && errno != EEXIST) { 00461 LogEvent(COMPONENT_CLIENTID, 00462 "Failed to create v4 recovery dir (%s), errno=%d", 00463 NFS_V4_RECOV_ROOT, errno); 00464 } 00465 00466 snprintf(v4_recov_dir, PATH_MAX, "%s/%s", 00467 NFS_V4_RECOV_ROOT, NFS_V4_RECOV_DIR); 00468 err = mkdir(v4_recov_dir, 0755); 00469 if (err == -1 && errno != EEXIST) { 00470 LogEvent(COMPONENT_CLIENTID, 00471 "Failed to create v4 recovery dir(%s), errno=%d", 00472 v4_recov_dir, errno); 00473 } 00474 00475 snprintf(v4_old_dir, PATH_MAX, "%s/%s", 00476 NFS_V4_RECOV_ROOT, NFS_V4_OLD_DIR); 00477 err = mkdir(v4_old_dir, 0755); 00478 if (err == -1 && errno != EEXIST) { 00479 LogEvent(COMPONENT_CLIENTID, 00480 "Failed to create v4 recovery dir(%s), errno=%d", 00481 v4_old_dir, errno); 00482 } 00483 if (nfs_param.core_param.clustered) { 00484 snprintf(v4_recov_dir, PATH_MAX, "%s/%s/node%d", 00485 NFS_V4_RECOV_ROOT, NFS_V4_RECOV_DIR, g_nodeid); 00486 00487 err = mkdir(v4_recov_dir, 0755); 00488 if (err == -1 && errno != EEXIST) { 00489 LogEvent(COMPONENT_CLIENTID, 00490 "Failed to create v4 recovery dir(%s), errno=%d", 00491 v4_recov_dir, errno); 00492 } 00493 00494 snprintf(v4_old_dir, PATH_MAX, "%s/%s/node%d", 00495 NFS_V4_RECOV_ROOT, NFS_V4_OLD_DIR, g_nodeid); 00496 00497 err = mkdir(v4_old_dir, 0755); 00498 if (err == -1 && errno != EEXIST) { 00499 LogEvent(COMPONENT_CLIENTID, 00500 "Failed to create v4 recovery dir(%s), errno=%d", 00501 v4_old_dir, errno); 00502 } 00503 } 00504 }