nfs-ganesha 1.4
|
00001 /* 00002 * vim:expandtab:shiftwidth=8:tabstop=8: 00003 * 00004 * Copyright (C) 2010 The Linux Box Corporation 00005 * All Rights Reserved 00006 * Contributor: Adam C. Emerson 00007 * 00008 * This program is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation; either 00011 * version 3 of the License, or (at your option) any later version. 00012 * 00013 * This program is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 * Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public 00019 * License along with this library; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00021 * 00022 * --------------------------------------- 00023 */ 00024 00035 #ifdef HAVE_CONFIG_H 00036 #include "config.h" 00037 #endif 00038 00039 #include "fsal.h" 00040 #include "fsal_internal.h" 00041 #include "fsal_convert.h" 00042 #include "nfsv41.h" 00043 #include <cephfs/libcephfs.h> 00044 #include <fcntl.h> 00045 #include "HashTable.h" 00046 #include <pthread.h> 00047 #include <stdint.h> 00048 #include "fsal_types.h" 00049 #include "fsal_pnfs.h" 00050 #include "pnfs_common.h" 00051 #include "fsal_pnfs_files.h" 00052 00053 const size_t BIGGEST_PATTERN = 1024; /* Linux supports a stripe 00054 pattern with no more than 4096 00055 stripes, but for now we stick 00056 to 1024 to keep them da_addrs 00057 from being too gigantic. */ 00058 00059 00060 00061 nfsstat4 00062 CEPHFSAL_layoutget(fsal_handle_t *exthandle, 00063 fsal_op_context_t *extcontext, 00064 XDR *loc_body, 00065 const struct fsal_layoutget_arg *arg, 00066 struct fsal_layoutget_res *res) 00067 { 00068 /* The FSAL handle as defined for the CEPH FSAL */ 00069 cephfsal_handle_t* handle = (cephfsal_handle_t*) exthandle; 00070 /* The FSAL operation context as defined for the CEPH FSAL */ 00071 cephfsal_op_context_t* context = (cephfsal_op_context_t*) extcontext; 00072 /* The mount passed to all*/ 00073 struct ceph_mount_info *cmount = context->export_context->cmount; 00074 /* Structure containing the storage parameters of the file within 00075 the Ceph cluster. */ 00076 struct ceph_file_layout file_layout; 00077 /* Width of each stripe on the file */ 00078 uint32_t stripe_width = 0; 00079 /* Utility parameter */ 00080 nfl_util4 util = 0; 00081 /* The last byte that can be accessed through pNFS */ 00082 uint64_t last_possible_byte = 0; 00083 /* The deviceid for this layout */ 00084 struct pnfs_deviceid deviceid = {0, 0}; 00085 /* Data server handle */ 00086 cephfsal_handle_t ds_handle; 00087 /* NFS Status */ 00088 nfsstat4 nfs_status = 0; 00089 00090 /* We support only LAYOUT4_NFSV4_1_FILES layouts */ 00091 00092 if (arg->type != LAYOUT4_NFSV4_1_FILES) { 00093 LogCrit(COMPONENT_PNFS, 00094 "Unsupported layout type: %x", 00095 arg->type); 00096 return NFS4ERR_UNKNOWN_LAYOUTTYPE; 00097 } 00098 00099 /* Get basic information on the file and calculate the dimensions 00100 of the layout we can support. */ 00101 00102 memset(&file_layout, 0, sizeof(struct ceph_file_layout)); 00103 00104 ceph_ll_file_layout(cmount, VINODE(handle), &file_layout); 00105 stripe_width = file_layout.fl_stripe_unit; 00106 last_possible_byte = (BIGGEST_PATTERN * stripe_width) - 1; 00107 00108 /* Since the Linux kernel refuses to work with any layout that 00109 doesn't cover the whole file, if a whole file layout is 00110 requested, lie. 00111 00112 Otherwise, make sure the required layout doesn't go beyond 00113 what can be accessed through pNFS. */ 00114 if (!((res->segment.offset == 0) && 00115 (res->segment.length == NFS4_UINT64_MAX))) { 00116 struct pnfs_segment smallest_acceptable = { 00117 .io_mode = res->segment.io_mode, 00118 .offset = res->segment.offset, 00119 .length = arg->minlength 00120 }; 00121 struct pnfs_segment forbidden_area = { 00122 .io_mode = res->segment.io_mode, 00123 .offset = last_possible_byte + 1, 00124 .length = NFS4_UINT64_MAX 00125 }; 00126 if (pnfs_segments_overlap(smallest_acceptable, 00127 forbidden_area)) { 00128 LogCrit(COMPONENT_PNFS, 00129 "Required layout extends beyond allowed region." 00130 "offset: %"PRIu64", minlength: %" PRIu64".", 00131 res->segment.offset, 00132 arg->minlength); 00133 return NFS4ERR_BADLAYOUT; 00134 } 00135 res->segment.offset = 0; 00136 res->segment.length = stripe_width * BIGGEST_PATTERN; 00137 res->segment.io_mode = LAYOUTIOMODE4_RW; 00138 } 00139 00140 /* For now, just make the low quad of the deviceid be the inode 00141 number. With the span of the layouts constrained above, this 00142 lets us generate the device address on the fly from the 00143 deviceid rather than storing it. */ 00144 00145 deviceid.export_id = arg->export_id; 00146 deviceid.devid = VINODE(handle).ino.val; 00147 00148 /* We return exactly one filehandle, filling in the necessary 00149 information for the DS server to speak to the Ceph OSD 00150 directly. */ 00151 00152 ds_handle = *handle; 00153 ds_handle.data.layout = file_layout; 00154 ds_handle.data.snapseq = ceph_ll_snap_seq(cmount, VINODE(handle)); 00155 00156 /* We are using sparse layouts with commit-through-DS, so our 00157 utility word contains only the stripe width, our first stripe 00158 is always at the beginning of the layout, and there is no 00159 pattern offset. */ 00160 00161 if ((stripe_width & ~NFL4_UFLG_STRIPE_UNIT_SIZE_MASK) != 0) { 00162 LogCrit(COMPONENT_PNFS, 00163 "Ceph returned stripe width that is disallowed by NFS: " 00164 "%"PRIu32".", stripe_width); 00165 return NFS4ERR_SERVERFAULT; 00166 } 00167 util = stripe_width; 00168 00169 if ((nfs_status 00170 = FSAL_encode_file_layout(loc_body, 00171 extcontext, 00172 &deviceid, 00173 util, 00174 0, 00175 0, 00176 1, 00177 (fsal_handle_t *)&ds_handle))) { 00178 LogCrit(COMPONENT_PNFS, "Failed to encode nfsv4_1_file_layout."); 00179 return nfs_status; 00180 } 00181 00182 /* We grant only one segment, and we want it back when the file 00183 is closed. */ 00184 00185 res->return_on_close = TRUE; 00186 res->last_segment = TRUE; 00187 00188 return NFS4_OK; 00189 } 00190 00191 nfsstat4 00192 CEPHFSAL_layoutreturn(fsal_handle_t* handle, 00193 fsal_op_context_t* context, 00194 XDR *lrf_body, 00195 const struct fsal_layoutreturn_arg *arg) 00196 00197 { 00198 /* Sanity check on type */ 00199 if (arg->lo_type != LAYOUT4_NFSV4_1_FILES) { 00200 LogCrit(COMPONENT_PNFS, 00201 "Unsupported layout type: %x", 00202 arg->lo_type); 00203 return NFS4ERR_UNKNOWN_LAYOUTTYPE; 00204 } 00205 00206 /* Since we no longer store DS addresses, we no longer have 00207 anything to free. Later on we should unravel the Ceph client 00208 a bit more and coordinate with the Ceph MDS's notion of read 00209 and write pins, but that isn't germane until we have 00210 LAYOUTRECALL. */ 00211 00212 return NFS4_OK; 00213 } 00214 00215 nfsstat4 00216 CEPHFSAL_layoutcommit(fsal_handle_t *exthandle, 00217 fsal_op_context_t *extcontext, 00218 XDR *lou_body, 00219 const struct fsal_layoutcommit_arg *arg, 00220 struct fsal_layoutcommit_res *res) 00221 { 00222 /* Filehandle for Ceph calls */ 00223 cephfsal_handle_t* handle = (cephfsal_handle_t*) exthandle; 00224 /* Operation context */ 00225 cephfsal_op_context_t* context = (cephfsal_op_context_t*) extcontext; 00226 /* Mount structure that must be supplied with each call to Ceph */ 00227 struct ceph_mount_info *cmount = context->export_context->cmount; 00228 /* User ID and group ID for permissions */ 00229 int uid = FSAL_OP_CONTEXT_TO_UID(context); 00230 int gid = FSAL_OP_CONTEXT_TO_GID(context); 00231 /* Old stat, so we don't truncate file or reverse time */ 00232 struct stat stold; 00233 /* new stat to set time and size */ 00234 struct stat stnew; 00235 /* Mask to determine exactly what gets set */ 00236 int attrmask = 0; 00237 /* Error returns from Ceph */ 00238 int ceph_status = 0; 00239 00240 /* Sanity check on type */ 00241 if (arg->type != LAYOUT4_NFSV4_1_FILES) { 00242 LogCrit(COMPONENT_PNFS, 00243 "Unsupported layout type: %x", 00244 arg->type); 00245 return NFS4ERR_UNKNOWN_LAYOUTTYPE; 00246 } 00247 00248 /* A more proper and robust implementation of this would use Ceph 00249 caps, but we need to hack at the client to expose those before 00250 it can work. */ 00251 00252 memset(&stold, 0, sizeof(struct stat)); 00253 if ((ceph_status = ceph_ll_getattr(cmount, VINODE(handle), 00254 &stold, uid, gid)) < 0) { 00255 if (ceph_status == -EPERM) { 00256 LogCrit(COMPONENT_PNFS, 00257 "User %u, Group %u not permitted to get attributes " 00258 "of file %" PRIu64 ".", 00259 uid, gid, VINODE(handle).ino.val); 00260 return NFS4ERR_ACCESS; 00261 } else { 00262 LogCrit(COMPONENT_PNFS, 00263 "Error %d in attempt to get attributes of " 00264 "file %" PRIu64 ".", 00265 -ceph_status, VINODE(handle).ino.val); 00266 return posix2nfs4_error(-ceph_status); 00267 } 00268 } 00269 00270 memset(&stnew, 0, sizeof(struct stat)); 00271 if (arg->new_offset) { 00272 if (stold.st_size < arg->last_write + 1) { 00273 attrmask |= CEPH_SETATTR_SIZE; 00274 stnew.st_size = arg->last_write + 1; 00275 res->size_supplied = TRUE; 00276 res->new_size = arg->last_write + 1; 00277 } 00278 } 00279 00280 if ((arg->time_changed) && 00281 (arg->new_time.seconds > stold.st_mtime)) { 00282 stnew.st_mtime = arg->new_time.seconds; 00283 } else { 00284 stnew.st_mtime = time(NULL); 00285 } 00286 00287 attrmask |= CEPH_SETATTR_MTIME; 00288 00289 if ((ceph_status = ceph_ll_setattr(cmount, VINODE(handle), &stnew, 00290 attrmask, uid, gid)) < 0) { 00291 if (ceph_status == -EPERM) { 00292 LogCrit(COMPONENT_PNFS, 00293 "User %u, Group %u not permitted to get attributes " 00294 "of file %" PRIu64 ".", 00295 uid, gid, VINODE(handle).ino.val); 00296 return NFS4ERR_ACCESS; 00297 } else { 00298 LogCrit(COMPONENT_PNFS, 00299 "Error %d in attempt to get attributes of " 00300 "file %" PRIu64 ".", 00301 -ceph_status, VINODE(handle).ino.val); 00302 return posix2nfs4_error(-ceph_status); 00303 } 00304 } 00305 00306 /* This is likely universal for files. */ 00307 00308 res->commit_done = TRUE; 00309 00310 return NFS4_OK; 00311 } 00312 00313 nfsstat4 00314 CEPHFSAL_getdeviceinfo(fsal_op_context_t *extcontext, 00315 XDR* da_addr_body, 00316 layouttype4 type, 00317 const struct pnfs_deviceid *deviceid) 00318 { 00319 /* Operation context */ 00320 cephfsal_op_context_t* context = (cephfsal_op_context_t*) extcontext; 00321 /* Mount structure that must be supplied with each call to Ceph */ 00322 struct ceph_mount_info *cmount = context->export_context->cmount; 00323 /* The number of Ceph OSDs in the cluster */ 00324 unsigned num_osds = ceph_ll_num_osds(cmount); 00325 /* Minimal information needed to get layout info */ 00326 vinodeno_t vinode; 00327 /* Structure containing the storage parameters of the file within 00328 the Ceph cluster. */ 00329 struct ceph_file_layout file_layout; 00330 /* Currently, all layouts have the same number of stripes */ 00331 uint32_t stripes = BIGGEST_PATTERN; 00332 /* Index for iterating over stripes */ 00333 size_t stripe = 0; 00334 /* Index for iterating over OSDs */ 00335 size_t osd = 0; 00336 /* NFSv4 status code */ 00337 nfsstat4 nfs_status = 0; 00338 00339 vinode.ino.val = deviceid->devid; 00340 vinode.snapid.val = CEPH_NOSNAP; 00341 00342 /* Sanity check on type */ 00343 if (type != LAYOUT4_NFSV4_1_FILES) { 00344 LogCrit(COMPONENT_PNFS, 00345 "Unsupported layout type: %x", 00346 type); 00347 return NFS4ERR_UNKNOWN_LAYOUTTYPE; 00348 } 00349 00350 /* Retrieve and calculate storage parameters of layout */ 00351 00352 memset(&file_layout, 0, sizeof(struct ceph_file_layout)); 00353 ceph_ll_file_layout(cmount, vinode, &file_layout); 00354 00355 /* As this is large, we encode as we go rather than building a 00356 structure and encoding it all at once. */ 00357 00358 /* The first entry in the nfsv4_1_file_ds_addr4 is the array of 00359 stripe indices. */ 00360 00361 /* First we encode the count of stripes. Since our pattern 00362 doesn't repeat, we have as many indices as we do stripes. */ 00363 00364 if (!xdr_uint32_t(da_addr_body, &stripes)) { 00365 LogCrit(COMPONENT_PNFS, "Failed to encode length of " 00366 "stripe_indices array: %" PRIu32 ".", stripes); 00367 return NFS4ERR_SERVERFAULT; 00368 } 00369 00370 for (stripe = 0; stripe < stripes; stripe++) { 00371 uint32_t stripe_osd 00372 = stripe_osd = ceph_ll_get_stripe_osd(cmount, 00373 vinode, 00374 stripe, 00375 &file_layout); 00376 if (stripe_osd < 0) { 00377 LogCrit(COMPONENT_PNFS, "Failed to retrieve OSD for " 00378 "stripe %lu of file %" PRIu64 ". Error: %u", 00379 stripe, deviceid->devid, -stripe_osd); 00380 return NFS4ERR_SERVERFAULT; 00381 } 00382 if (!xdr_uint32_t(da_addr_body, &stripe_osd)) { 00383 LogCrit(COMPONENT_PNFS, "Failed to encode OSD for stripe %lu.", 00384 stripe); 00385 return NFS4ERR_SERVERFAULT; 00386 } 00387 } 00388 00389 /* The number of OSDs in our cluster is the length of our array 00390 of multipath_lists */ 00391 00392 if (!xdr_uint32_t(da_addr_body, &num_osds)) { 00393 LogCrit(COMPONENT_PNFS, "Failed to encode length of " 00394 "multipath_ds_list array: %u", num_osds); 00395 return NFS4ERR_SERVERFAULT; 00396 } 00397 00398 /* Since our index is the OSD number itself, we have only one 00399 host per multipath_list. */ 00400 00401 for(osd = 0; osd < num_osds; osd++) { 00402 fsal_multipath_member_t host; 00403 memset(&host, 0, sizeof(fsal_multipath_member_t)); 00404 host.proto = 6; 00405 if (ceph_ll_osdaddr(cmount, osd, &host.addr) < 0) { 00406 LogCrit(COMPONENT_PNFS, 00407 "Unable to get IP address for OSD %lu.", 00408 osd); 00409 return NFS4ERR_SERVERFAULT; 00410 } 00411 host.port = 2049; 00412 if ((nfs_status 00413 = FSAL_encode_v4_multipath(da_addr_body, 00414 1, 00415 &host)) 00416 != NFS4_OK) { 00417 return nfs_status; 00418 } 00419 } 00420 00421 return NFS4_OK; 00422 } 00423 00424 nfsstat4 00425 CEPHFSAL_getdevicelist(fsal_handle_t *handle, 00426 fsal_op_context_t *context, 00427 const struct fsal_getdevicelist_arg *arg, 00428 struct fsal_getdevicelist_res *res) 00429 { 00430 /* Sanity check on type */ 00431 if (arg->type != LAYOUT4_NFSV4_1_FILES) { 00432 LogCrit(COMPONENT_PNFS, 00433 "Unsupported layout type: %x", 00434 arg->type); 00435 return NFS4ERR_UNKNOWN_LAYOUTTYPE; 00436 } 00437 00438 /* We have neither the ability nor the desire to return all valid 00439 deviceids, so we do nothing successfully. */ 00440 00441 res->count = 0; 00442 res->eof = TRUE; 00443 00444 return NFS4_OK; 00445 }