Network Block Device  @PACKAGE_VERSION@
nbd-server.c
Go to the documentation of this file.
1 /*
2  * Network Block Device - server
3  *
4  * Copyright 1996-1998 Pavel Machek, distribute under GPL
5  * <pavel@atrey.karlin.mff.cuni.cz>
6  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
7  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
8  *
9  * Version 1.0 - hopefully 64-bit-clean
10  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
11  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
12  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
13  * type, or don't have 64 bit file offsets by defining FS_32BIT
14  * in compile options for nbd-server *only*. This can be done
15  * with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
16  * original autoconf input file, or I would make it a configure
17  * option.) Ken Yap <ken@nlc.net.au>.
18  * Version 1.6 - fix autodetection of block device size and really make 64 bit
19  * clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
20  * Version 2.0 - Version synchronised with client
21  * Version 2.1 - Reap zombie client processes when they exit. Removed
22  * (uncommented) the _IO magic, it's no longer necessary. Wouter
23  * Verhelst <wouter@debian.org>
24  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
25  * Version 2.3 - Fixed code so that Large File Support works. This
26  * removes the FS_32BIT compile-time directive; define
27  * _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
28  * using FS_32BIT. This will allow you to use files >2GB instead of
29  * having to use the -m option. Wouter Verhelst <wouter@debian.org>
30  * Version 2.4 - Added code to keep track of children, so that we can
31  * properly kill them from initscripts. Add a call to daemon(),
32  * so that processes don't think they have to wait for us, which is
33  * interesting for initscripts as well. Wouter Verhelst
34  * <wouter@debian.org>
35  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
36  * zero after fork()ing, resulting in nbd-server going berserk
37  * when it receives a signal with at least one child open. Wouter
38  * Verhelst <wouter@debian.org>
39  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
40  * rectified type of mainloop::size_host (sf.net bugs 814435 and
41  * 817385); close the PID file after writing to it, so that the
42  * daemon can actually be found. Wouter Verhelst
43  * <wouter@debian.org>
44  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
45  * correctly put in network endianness. Many types were corrected
46  * (size_t and off_t instead of int). <vspaceg@sourceforge.net>
47  * Version 2.6 - Some code cleanup.
48  * Version 2.7 - Better build system.
49  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a
50  * lot more work, but this is a start. Wouter Verhelst
51  * <wouter@debian.org>
52  * 16/03/2010 - Add IPv6 support.
53  * Kitt Tientanopajai <kitt@kitty.in.th>
54  * Neutron Soutmun <neo.neutron@gmail.com>
55  * Suriya Soutmun <darksolar@gmail.com>
56  */
57 
58 /* Includes LFS defines, which defines behaviours of some of the following
59  * headers, so must come before those */
60 #include "lfs.h"
61 
62 #include <assert.h>
63 #include <sys/types.h>
64 #include <sys/socket.h>
65 #include <sys/stat.h>
66 #include <sys/select.h>
67 #include <sys/wait.h>
68 #include <sys/un.h>
69 #ifdef HAVE_SYS_IOCTL_H
70 #include <sys/ioctl.h>
71 #endif
72 #include <sys/param.h>
73 #ifdef HAVE_SYS_MOUNT_H
74 #include <sys/mount.h>
75 #endif
76 #include <signal.h>
77 #include <errno.h>
78 #include <libgen.h>
79 #include <netinet/tcp.h>
80 #include <netinet/in.h>
81 #include <netdb.h>
82 #include <syslog.h>
83 #include <unistd.h>
84 #include <stdbool.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <fcntl.h>
89 #if HAVE_FALLOC_PH
90 #include <linux/falloc.h>
91 #endif
92 #include <arpa/inet.h>
93 #include <strings.h>
94 #include <dirent.h>
95 #include <unistd.h>
96 #include <getopt.h>
97 #include <pwd.h>
98 #include <grp.h>
99 #include <dirent.h>
100 #include <ctype.h>
101 
102 #include <glib.h>
103 
104 /* used in cliserv.h, so must come first */
105 #define MY_NAME "nbd_server"
106 #include "cliserv.h"
107 #include "nbd-debug.h"
108 #include "netdb-compat.h"
109 
110 #ifdef WITH_SDP
111 #include <sdp_inet.h>
112 #endif
113 
114 /** Default position of the config file */
115 #ifndef SYSCONFDIR
116 #define SYSCONFDIR "/etc"
117 #endif
118 #define CFILE SYSCONFDIR "/nbd-server/config"
119 
120 /** Where our config file actually is */
122 
123 /** global flags */
125 
126 /* Whether we should avoid forking */
127 int dontfork = 0;
128 
129 /**
130  * The highest value a variable of type off_t can reach. This is a signed
131  * integer, so set all bits except for the leftmost one.
132  **/
133 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
134 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) /**< Size of buffer that can hold requests */
135 #define DIFFPAGESIZE 4096 /**< diff file uses those chunks */
136 #define TREEPAGESIZE 4096 /**< tree (block) files uses those chunks */
137 #define TREEDIRSIZE 1024 /**< number of files per subdirectory (or subdirs per subdirectory) */
138 
139 /** Per-export flags: */
140 #define F_READONLY 1 /**< flag to tell us a file is readonly */
141 #define F_MULTIFILE 2 /**< flag to tell us a file is exported using -m */
142 #define F_COPYONWRITE 4 /**< flag to tell us a file is exported using
143  copyonwrite */
144 #define F_AUTOREADONLY 8 /**< flag to tell us a file is set to autoreadonly */
145 #define F_SPARSE 16 /**< flag to tell us copyronwrite should use a sparse file */
146 #define F_SDP 32 /**< flag to tell us the export should be done using the Socket Direct Protocol for RDMA */
147 #define F_SYNC 64 /**< Whether to fsync() after a write */
148 #define F_FLUSH 128 /**< Whether server wants FLUSH to be sent by the client */
149 #define F_FUA 256 /**< Whether server wants FUA to be sent by the client */
150 #define F_ROTATIONAL 512 /**< Whether server wants the client to implement the elevator algorithm */
151 #define F_TEMPORARY 1024 /**< Whether the backing file is temporary and should be created then unlinked */
152 #define F_TRIM 2048 /**< Whether server wants TRIM (discard) to be sent by the client */
153 #define F_FIXED 4096 /**< Client supports fixed new-style protocol (and can thus send us extra options */
154 #define F_TREEFILES 8192 /**< flag to tell us a file is exported using -t */
156 /** Global flags: */
157 #define F_OLDSTYLE 1 /**< Allow oldstyle (port-based) exports */
158 #define F_LIST 2 /**< Allow clients to list the exports on a server */
159 #define F_NO_ZEROES 4 /**< Do not send zeros to client */
160 GHashTable *children;
161 char pidfname[256]; /**< name of our PID file */
162 char pidftemplate[256]; /**< template to be used for the filename of the PID file */
163 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; /**< default name of allow file */
165 #define NEG_INIT (1 << 0)
166 #define NEG_OLD (1 << 1)
167 #define NEG_MODERN (1 << 2)
169 #include <nbdsrv.h>
170 
171 static volatile sig_atomic_t is_sigchld_caught; /**< Flag set by
172  SIGCHLD handler
173  to mark a child
174  exit */
175 
176 static volatile sig_atomic_t is_sigterm_caught; /**< Flag set by
177  SIGTERM handler
178  to mark a exit
179  request */
180 
181 static volatile sig_atomic_t is_sighup_caught; /**< Flag set by SIGHUP
182  handler to mark a
183  reconfiguration
184  request */
185 
186 GArray* modernsocks; /**< Sockets for the modern handler. Not used
187  if a client was only specified on the
188  command line; only port used if
189  oldstyle is set to false (and then the
190  command-line client isn't used, gna gna).
191  This may be more than one socket on
192  systems that don't support serving IPv4
193  and IPv6 from the same socket (like,
194  e.g., FreeBSD) */
195 
196 bool logged_oversized=false; /**< whether we logged oversized requests already */
197 
198 /**
199  * Variables associated with an open file
200  **/
201 typedef struct {
202  int fhandle; /**< file descriptor */
203  off_t startoff; /**< starting offset of this file */
204 } FILE_INFO;
205 
206 /**
207  * Type of configuration file values
208  **/
209 typedef enum {
210  PARAM_INT, /**< This parameter is an integer */
211  PARAM_INT64, /**< This parameter is an integer */
212  PARAM_STRING, /**< This parameter is a string */
213  PARAM_BOOL, /**< This parameter is a boolean */
214 } PARAM_TYPE;
215 
216 /**
217  * Configuration file values
218  **/
219 typedef struct {
220  gchar *paramname; /**< Name of the parameter, as it appears in
221  the config file */
222  gboolean required; /**< Whether this is a required (as opposed to
223  optional) parameter */
224  PARAM_TYPE ptype; /**< Type of the parameter. */
225  gpointer target; /**< Pointer to where the data of this
226  parameter should be written. If ptype is
227  PARAM_BOOL, the data is or'ed rather than
228  overwritten. */
229  gint flagval; /**< Flag mask for this parameter in case ptype
230  is PARAM_BOOL. */
231 } PARAM;
232 
233 /**
234  * Configuration file values of the "generic" section
235  **/
236 struct generic_conf {
237  gchar *user; /**< user we run the server as */
238  gchar *group; /**< group we run running as */
239  gchar *modernaddr; /**< address of the modern socket */
240  gchar *modernport; /**< port of the modern socket */
241  gchar *unixsock; /**< file name of the unix domain socket */
242  gint flags; /**< global flags */
243 };
244 
245 /**
246  * Translate a command name into human readable form
247  *
248  * @param command The command number (after applying NBD_CMD_MASK_COMMAND)
249  * @return pointer to the command name
250  **/
251 static inline const char * getcommandname(uint64_t command) {
252  switch (command) {
253  case NBD_CMD_READ:
254  return "NBD_CMD_READ";
255  case NBD_CMD_WRITE:
256  return "NBD_CMD_WRITE";
257  case NBD_CMD_DISC:
258  return "NBD_CMD_DISC";
259  case NBD_CMD_FLUSH:
260  return "NBD_CMD_FLUSH";
261  case NBD_CMD_TRIM:
262  return "NBD_CMD_TRIM";
263  default:
264  return "UNKNOWN";
265  }
266 }
267 
268 /**
269  * Read data from a file descriptor into a buffer
270  *
271  * @param f a file descriptor
272  * @param buf a buffer
273  * @param len the number of bytes to be read
274  **/
275 static inline void readit(int f, void *buf, size_t len) {
276  ssize_t res;
277  while (len > 0) {
278  DEBUG("*");
279  if ((res = read(f, buf, len)) <= 0) {
280  if(errno != EAGAIN) {
281  err("Read failed: %m");
282  }
283  } else {
284  len -= res;
285  buf += res;
286  }
287  }
288 }
289 
290 /**
291  * Consume data from an FD that we don't want
292  *
293  * @param f a file descriptor
294  * @param buf a buffer
295  * @param len the number of bytes to consume
296  * @param bufsiz the size of the buffer
297  **/
298 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
299  size_t curlen;
300  while (len>0) {
301  curlen = (len>bufsiz)?bufsiz:len;
302  readit(f, buf, curlen);
303  len -= curlen;
304  }
305 }
306 
307 /**
308  * Write data from a buffer into a filedescriptor
309  *
310  * @param f a file descriptor
311  * @param buf a buffer containing data
312  * @param len the number of bytes to be written
313  **/
314 static inline void writeit(int f, void *buf, size_t len) {
315  ssize_t res;
316  while (len > 0) {
317  DEBUG("+");
318  if ((res = write(f, buf, len)) <= 0)
319  err("Send failed: %m");
320  len -= res;
321  buf += res;
322  }
323 }
324 
325 void myseek(int handle,off_t a);
326 
327 /**
328  * Tree structure helper functions
329  */
330 
331 static void construct_path(char* name,int lenmax,off_t size, off_t pos, off_t * ppos) {
332  if (lenmax<10)
333  err("Char buffer overflow. This is likely a bug.");
334 
335  if (size<TREEDIRSIZE*TREEPAGESIZE) {
336  // we are done, add filename
337  snprintf(name,lenmax,"/FILE%04X",(pos/TREEPAGESIZE) % TREEDIRSIZE);
338  *ppos = pos / (TREEPAGESIZE*TREEDIRSIZE);
339  } else {
340  construct_path(name+9,lenmax-9,size/TREEDIRSIZE,pos,ppos);
341  char buffer[10];
342  snprintf(buffer,sizeof(buffer),"/TREE%04X",*ppos % TREEDIRSIZE);
343  memcpy(name,buffer,9); // copy into string without trailing zero
344  *ppos/=TREEDIRSIZE;
345  }
346 }
347 
348 static void mkdir_path(char * path) {
349  char *subpath=path+1;
350  while (subpath=strchr(subpath,'/')) {
351  *subpath='\0'; // path is modified in place with terminating null char instead of slash
352  if (mkdir(path,0700)==-1) {
353  if (errno!=EEXIST)
354  err("Path access error! %m");
355  }
356  *subpath='/';
357  subpath++;
358  }
359 }
360 
361 static int open_treefile(char* name,mode_t mode,off_t size,off_t pos) {
362  char filename[256+strlen(name)];
363  strcpy(filename,name);
364  off_t ppos;
365  construct_path(filename+strlen(name),256,size,pos,&ppos);
366 
367  DEBUG("Accessing treefile %s ( offset %llu of %llu)",filename,(unsigned long long)pos,(unsigned long long)size);
368 
369  int handle=open(filename, mode, 0600);
370  if (handle<0 && errno==ENOENT) {
371  if (mode & O_RDWR) {
372 
373  DEBUG("Creating new treepath");
374 
375  mkdir_path(filename);
376  handle=open(filename, O_RDWR|O_CREAT, 0600);
377  if (handle<0) {
378  err("Error opening tree block file %m");
379  }
380  } else {
381 
382  DEBUG("Creating a dummy tempfile for reading");
383  gchar * tmpname;
384  tmpname = g_strdup_printf("dummy-XXXXXX");
385  handle = mkstemp(tmpname);
386  if (handle>0) {
387  unlink(tmpname); /* File will stick around whilst FD open */
388  } else {
389  err("Error opening tree block file %m");
390  }
391  g_free(tmpname);
392  }
393  char *n = "\0";
394  myseek(handle,TREEPAGESIZE-1);
395  ssize_t c = write(handle,n,1);
396  if (c<1) {
397  err("Error setting tree block file size %m");
398  }
399  }
400  return handle;
401 }
402 
403 static void delete_treefile(char* name,off_t size,off_t pos) {
404  char filename[256+strlen(name)];
405  strcpy(filename,name);
406  size_t psize=size;
407  off_t ppos;
408  construct_path(filename+strlen(name),256,size,pos,&ppos);
409 
410  DEBUG("Deleting treefile: %s",filename);
411 
412  if (unlink(filename)==-1)
413  DEBUG("Deleting failed : %s",strerror(errno));
414 }
415 
416 
417 /**
418  * Print out a message about how to use nbd-server. Split out to a separate
419  * function so that we can call it from multiple places
420  */
421 void usage() {
422  printf("This is nbd-server version " VERSION "\n");
423  printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections] [-V]\n"
424  "\t-r|--read-only\t\tread only\n"
425  "\t-m|--multi-file\t\tmultiple file\n"
426  "\t-c|--copy-on-write\tcopy on write\n"
427  "\t-C|--config-file\tspecify an alternate configuration file\n"
428  "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
429  "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
430  "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
431  "\t-M|--max-connections\tspecify the maximum number of opened connections\n"
432  "\t-V|--version\toutput the version and exit\n\n"
433  "\tif port is set to 0, stdin is used (for running from inetd).\n"
434  "\tif file_to_export contains '%%s', it is substituted with the IP\n"
435  "\t\taddress of the machine trying to connect\n"
436  "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
437  printf("Using configuration file %s\n", CFILE);
438 }
439 
440 /* Dumps a config file section of the given SERVER*, and exits. */
441 void dump_section(SERVER* serve, gchar* section_header) {
442  printf("[%s]\n", section_header);
443  printf("\texportname = %s\n", serve->exportname);
444  printf("\tlistenaddr = %s\n", serve->listenaddr);
445  printf("\tport = %d\n", serve->port);
446  if(serve->flags & F_READONLY) {
447  printf("\treadonly = true\n");
448  }
449  if(serve->flags & F_MULTIFILE) {
450  printf("\tmultifile = true\n");
451  }
452  if(serve->flags & F_TREEFILES) {
453  printf("\ttreefiles = true\n");
454  }
455  if(serve->flags & F_COPYONWRITE) {
456  printf("\tcopyonwrite = true\n");
457  }
458  if(serve->expected_size) {
459  printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
460  }
461  if(serve->authname) {
462  printf("\tauthfile = %s\n", serve->authname);
463  }
464  exit(EXIT_SUCCESS);
465 }
466 
467 /**
468  * Parse the command line.
469  *
470  * @param argc the argc argument to main()
471  * @param argv the argv argument to main()
472  **/
473 SERVER* cmdline(int argc, char *argv[]) {
474  int i=0;
475  int nonspecial=0;
476  int c;
477  struct option long_options[] = {
478  {"read-only", no_argument, NULL, 'r'},
479  {"multi-file", no_argument, NULL, 'm'},
480  {"copy-on-write", no_argument, NULL, 'c'},
481  {"dont-fork", no_argument, NULL, 'd'},
482  {"authorize-file", required_argument, NULL, 'l'},
483  {"config-file", required_argument, NULL, 'C'},
484  {"pid-file", required_argument, NULL, 'p'},
485  {"output-config", required_argument, NULL, 'o'},
486  {"max-connection", required_argument, NULL, 'M'},
487  {"version", no_argument, NULL, 'V'},
488  {0,0,0,0}
489  };
490  SERVER *serve;
491  off_t es;
492  size_t last;
493  char suffix;
494  gboolean do_output=FALSE;
495  gchar* section_header="";
496  gchar** addr_port;
497 
498  if(argc==1) {
499  return NULL;
500  }
501  serve=g_new0(SERVER, 1);
502  serve->authname = g_strdup(default_authname);
503  serve->virtstyle=VIRT_IPLIT;
504  while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:V", long_options, &i))>=0) {
505  switch (c) {
506  case 1:
507  /* non-option argument */
508  switch(nonspecial++) {
509  case 0:
510  if(strchr(optarg, ':') == strrchr(optarg, ':')) {
511  addr_port=g_strsplit(optarg, ":", 2);
512 
513  /* Check for "@" - maybe user using this separator
514  for IPv4 address */
515  if(!addr_port[1]) {
516  g_strfreev(addr_port);
517  addr_port=g_strsplit(optarg, "@", 2);
518  }
519  } else {
520  addr_port=g_strsplit(optarg, "@", 2);
521  }
522 
523  if(addr_port[1]) {
524  serve->port=strtol(addr_port[1], NULL, 0);
525  serve->listenaddr=g_strdup(addr_port[0]);
526  } else {
527  serve->listenaddr=NULL;
528  serve->port=strtol(addr_port[0], NULL, 0);
529  }
530  g_strfreev(addr_port);
531  break;
532  case 1:
533  serve->exportname = g_strdup(optarg);
534  if(serve->exportname[0] != '/') {
535  fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
536  exit(EXIT_FAILURE);
537  }
538  break;
539  case 2:
540  last=strlen(optarg)-1;
541  suffix=optarg[last];
542  if (suffix == 'k' || suffix == 'K' ||
543  suffix == 'm' || suffix == 'M')
544  optarg[last] = '\0';
545  es = (off_t)atoll(optarg);
546  switch (suffix) {
547  case 'm':
548  case 'M': es <<= 10;
549  case 'k':
550  case 'K': es <<= 10;
551  default : break;
552  }
553  serve->expected_size = es;
554  break;
555  }
556  break;
557  case 'r':
558  serve->flags |= F_READONLY;
559  break;
560  case 'm':
561  serve->flags |= F_MULTIFILE;
562  break;
563  case 'o':
564  do_output = TRUE;
565  section_header = g_strdup(optarg);
566  break;
567  case 'p':
568  strncpy(pidftemplate, optarg, 256);
569  pidftemplate[255]='\0';
570  break;
571  case 'c':
572  serve->flags |=F_COPYONWRITE;
573  break;
574  case 'd':
575  dontfork = 1;
576  break;
577  case 'C':
578  g_free(config_file_pos);
579  config_file_pos=g_strdup(optarg);
580  break;
581  case 'l':
582  g_free(serve->authname);
583  serve->authname=g_strdup(optarg);
584  break;
585  case 'M':
586  serve->max_connections = strtol(optarg, NULL, 0);
587  break;
588  case 'V':
589  printf("This is nbd-server version " VERSION "\n");
590  exit(EXIT_SUCCESS);
591  break;
592  default:
593  usage();
594  exit(EXIT_FAILURE);
595  break;
596  }
597  }
598  /* What's left: the port to export, the name of the to be exported
599  * file, and, optionally, the size of the file, in that order. */
600  if(nonspecial<2) {
601  g_free(serve);
602  serve=NULL;
603  } else {
605  }
606  if(do_output) {
607  if(!serve) {
608  g_critical("Need a complete configuration on the command line to output a config file section!");
609  exit(EXIT_FAILURE);
610  }
611  dump_section(serve, section_header);
612  }
613  return serve;
614 }
615 
616 /* forward definition of parse_cfile */
617 GArray* parse_cfile(gchar* f, struct generic_conf *genconf, bool expect_generic, GError** e);
618 
619 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
620 #define NBD_D_TYPE de->d_type
621 #else
622 #define NBD_D_TYPE 0
623 #define DT_UNKNOWN 0
624 #define DT_REG 1
625 #endif
626 
627 /**
628  * Parse config file snippets in a directory. Uses readdir() and friends
629  * to find files and open them, then passes them on to parse_cfile
630  * with have_global set false
631  **/
632 GArray* do_cfile_dir(gchar* dir, struct generic_conf *const genconf, GError** e) {
633  DIR* dirh = opendir(dir);
634  struct dirent* de;
635  gchar* fname;
636  GArray* retval = NULL;
637  GArray* tmp;
638  struct stat stbuf;
639 
640  if(!dirh) {
641  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
642  return NULL;
643  }
644  errno=0;
645  while((de = readdir(dirh))) {
646  int saved_errno=errno;
647  fname = g_build_filename(dir, de->d_name, NULL);
648  switch(NBD_D_TYPE) {
649  case DT_UNKNOWN:
650  /* Filesystem doesn't return type of
651  * file through readdir. Run stat() on
652  * the file instead */
653  if(stat(fname, &stbuf)) {
654  perror("stat");
655  goto err_out;
656  }
657  if (!S_ISREG(stbuf.st_mode)) {
658  goto next;
659  }
660  case DT_REG:
661  /* Skip unless the name ends with '.conf' */
662  if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
663  goto next;
664  }
665  tmp = parse_cfile(fname, genconf, false, e);
666  errno=saved_errno;
667  if(*e) {
668  goto err_out;
669  }
670  if(!retval)
671  retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
672  retval = g_array_append_vals(retval, tmp->data, tmp->len);
673  g_array_free(tmp, TRUE);
674  default:
675  break;
676  }
677  next:
678  g_free(fname);
679  }
680  if(errno) {
681  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
682  err_out:
683  if(retval)
684  g_array_free(retval, TRUE);
685  retval = NULL;
686  }
687  if(dirh)
688  closedir(dirh);
689  return retval;
690 }
691 
692 /**
693  * Parse the config file.
694  *
695  * @param f the name of the config file
696  *
697  * @param genconf a pointer to generic configuration which will get
698  * updated with parsed values. If NULL, then parsed generic
699  * configuration values are safely and silently discarded.
700  *
701  * @param e a GError. Error code can be any of the following:
702  * NBDS_ERR_CFILE_NOTFOUND, NBDS_ERR_CFILE_MISSING_GENERIC,
703  * NBDS_ERR_CFILE_VALUE_INVALID, NBDS_ERR_CFILE_VALUE_UNSUPPORTED
704  * or NBDS_ERR_CFILE_NO_EXPORTS. @see NBDS_ERRS.
705  *
706  * @param expect_generic if true, we expect a configuration file that
707  * contains a [generic] section. If false, we don't.
708  *
709  * @return a GArray of SERVER* pointers. If the config file is empty or does not
710  * exist, returns an empty GArray; if the config file contains an
711  * error, returns NULL, and e is set appropriately
712  **/
713 GArray* parse_cfile(gchar* f, struct generic_conf *const genconf, bool expect_generic, GError** e) {
714  const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
715  const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
716  gchar* cfdir = NULL;
717  SERVER s;
718  gchar *virtstyle=NULL;
719  PARAM lp[] = {
720  { "exportname", TRUE, PARAM_STRING, &(s.exportname), 0 },
721  { "authfile", FALSE, PARAM_STRING, &(s.authname), 0 },
722  { "filesize", FALSE, PARAM_OFFT, &(s.expected_size), 0 },
723  { "virtstyle", FALSE, PARAM_STRING, &(virtstyle), 0 },
724  { "prerun", FALSE, PARAM_STRING, &(s.prerun), 0 },
725  { "postrun", FALSE, PARAM_STRING, &(s.postrun), 0 },
726  { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog), 0 },
727  { "cowdir", FALSE, PARAM_STRING, &(s.cowdir), 0 },
728  { "readonly", FALSE, PARAM_BOOL, &(s.flags), F_READONLY },
729  { "multifile", FALSE, PARAM_BOOL, &(s.flags), F_MULTIFILE },
730  { "treefiles", FALSE, PARAM_BOOL, &(s.flags), F_TREEFILES },
731  { "copyonwrite", FALSE, PARAM_BOOL, &(s.flags), F_COPYONWRITE },
732  { "sparse_cow", FALSE, PARAM_BOOL, &(s.flags), F_SPARSE },
733  { "sdp", FALSE, PARAM_BOOL, &(s.flags), F_SDP },
734  { "sync", FALSE, PARAM_BOOL, &(s.flags), F_SYNC },
735  { "flush", FALSE, PARAM_BOOL, &(s.flags), F_FLUSH },
736  { "fua", FALSE, PARAM_BOOL, &(s.flags), F_FUA },
737  { "rotational", FALSE, PARAM_BOOL, &(s.flags), F_ROTATIONAL },
738  { "temporary", FALSE, PARAM_BOOL, &(s.flags), F_TEMPORARY },
739  { "trim", FALSE, PARAM_BOOL, &(s.flags), F_TRIM },
740  { "listenaddr", FALSE, PARAM_STRING, &(s.listenaddr), 0 },
741  { "maxconnections", FALSE, PARAM_INT, &(s.max_connections), 0 },
742  };
743  const int lp_size=sizeof(lp)/sizeof(PARAM);
744  struct generic_conf genconftmp;
745  PARAM gp[] = {
746  { "user", FALSE, PARAM_STRING, &(genconftmp.user), 0 },
747  { "group", FALSE, PARAM_STRING, &(genconftmp.group), 0 },
748  { "oldstyle", FALSE, PARAM_BOOL, &(genconftmp.flags), F_OLDSTYLE }, // only left here so we can issue an appropriate error message when the option is used
749  { "listenaddr", FALSE, PARAM_STRING, &(genconftmp.modernaddr), 0 },
750  { "port", FALSE, PARAM_STRING, &(genconftmp.modernport), 0 },
751  { "includedir", FALSE, PARAM_STRING, &cfdir, 0 },
752  { "allowlist", FALSE, PARAM_BOOL, &(genconftmp.flags), F_LIST },
753  { "unixsock", FALSE, PARAM_STRING, &(genconftmp.unixsock), 0 },
754  };
755  PARAM* p=gp;
756  int p_size=sizeof(gp)/sizeof(PARAM);
757  GKeyFile *cfile;
758  GError *err = NULL;
759  const char *err_msg=NULL;
760  GArray *retval=NULL;
761  gchar **groups;
762  gboolean bval;
763  gint ival;
764  gint64 i64val;
765  gchar* sval;
766  gchar* startgroup;
767  gint i;
768  gint j;
769 
770  memset(&genconftmp, 0, sizeof(struct generic_conf));
771 
772  if (genconf) {
773  /* Use the passed configuration values as defaults. The
774  * parsing algorithm below updates all parameter targets
775  * found from configuration files. */
776  memcpy(&genconftmp, genconf, sizeof(struct generic_conf));
777  }
778 
779  cfile = g_key_file_new();
780  retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
781  if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
782  G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
783  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NOTFOUND, "Could not open config file %s: %s",
784  f, err->message);
785  g_key_file_free(cfile);
786  return retval;
787  }
788  startgroup = g_key_file_get_start_group(cfile);
789  if((!startgroup || strcmp(startgroup, "generic")) && expect_generic) {
790  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
791  g_key_file_free(cfile);
792  return NULL;
793  }
794  groups = g_key_file_get_groups(cfile, NULL);
795  for(i=0;groups[i];i++) {
796  memset(&s, '\0', sizeof(SERVER));
797 
798  /* After the [generic] group or when we're parsing an include
799  * directory, start parsing exports */
800  if(i==1 || !expect_generic) {
801  p=lp;
802  p_size=lp_size;
803  }
804  for(j=0;j<p_size;j++) {
805  assert(p[j].target != NULL);
806  assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
807  switch(p[j].ptype) {
808  case PARAM_INT:
809  ival = g_key_file_get_integer(cfile,
810  groups[i],
811  p[j].paramname,
812  &err);
813  if(!err) {
814  *((gint*)p[j].target) = ival;
815  }
816  break;
817  case PARAM_INT64:
818  i64val = g_key_file_get_int64(cfile,
819  groups[i],
820  p[j].paramname,
821  &err);
822  if(!err) {
823  *((gint64*)p[j].target) = i64val;
824  }
825  break;
826  case PARAM_STRING:
827  sval = g_key_file_get_string(cfile,
828  groups[i],
829  p[j].paramname,
830  &err);
831  if(!err) {
832  *((gchar**)p[j].target) = sval;
833  }
834  break;
835  case PARAM_BOOL:
836  bval = g_key_file_get_boolean(cfile,
837  groups[i],
838  p[j].paramname, &err);
839  if(!err) {
840  if(bval) {
841  *((gint*)p[j].target) |= p[j].flagval;
842  } else {
843  *((gint*)p[j].target) &= ~(p[j].flagval);
844  }
845  }
846  break;
847  }
848  if(err) {
849  if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
850  if(!p[j].required) {
851  /* Ignore not-found error for optional values */
852  g_clear_error(&err);
853  continue;
854  } else {
855  err_msg = MISSING_REQUIRED_ERROR;
856  }
857  } else {
858  err_msg = DEFAULT_ERROR;
859  }
860  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
861  g_array_free(retval, TRUE);
862  g_error_free(err);
863  g_key_file_free(cfile);
864  return NULL;
865  }
866  }
867  if(virtstyle) {
868  if(!strncmp(virtstyle, "none", 4)) {
870  } else if(!strncmp(virtstyle, "ipliteral", 9)) {
872  } else if(!strncmp(virtstyle, "iphash", 6)) {
874  } else if(!strncmp(virtstyle, "cidrhash", 8)) {
876  if(strlen(virtstyle)<10) {
877  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
878  g_array_free(retval, TRUE);
879  g_key_file_free(cfile);
880  return NULL;
881  }
882  s.cidrlen=strtol(virtstyle+8, NULL, 0);
883  } else {
884  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
885  g_array_free(retval, TRUE);
886  g_key_file_free(cfile);
887  return NULL;
888  }
889  } else {
891  }
892  if(genconftmp.flags & F_OLDSTYLE) {
893  g_message("Since 3.10, the oldstyle protocol is no longer supported. Please migrate to the newstyle protocol.");
894  g_message("Exiting.");
895  return NULL;
896  }
897  /* Don't need to free this, it's not our string */
898  virtstyle=NULL;
899  /* Don't append values for the [generic] group */
900  if(i>0 || !expect_generic) {
901  s.socket_family = AF_UNSPEC;
902  s.servename = groups[i];
903 
904  append_serve(&s, retval);
905  }
906 #ifndef WITH_SDP
907  if(s.flags & F_SDP) {
908  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
909  g_array_free(retval, TRUE);
910  g_key_file_free(cfile);
911  return NULL;
912  }
913 #endif
914  }
915  g_key_file_free(cfile);
916  if(cfdir) {
917  GArray* extra = do_cfile_dir(cfdir, &genconftmp, e);
918  if(extra) {
919  retval = g_array_append_vals(retval, extra->data, extra->len);
920  i+=extra->len;
921  g_array_free(extra, TRUE);
922  } else {
923  if(*e) {
924  g_array_free(retval, TRUE);
925  return NULL;
926  }
927  }
928  }
929  if(i==1 && expect_generic) {
930  g_set_error(e, NBDS_ERR, NBDS_ERR_CFILE_NO_EXPORTS, "The config file does not specify any exports");
931  }
932 
933  if (genconf) {
934  /* Return the updated generic configuration through the
935  * pointer parameter. */
936  memcpy(genconf, &genconftmp, sizeof(struct generic_conf));
937  }
938 
939  return retval;
940 }
941 
942 /**
943  * Handle SIGCHLD by setting atomically a flag which will be evaluated in the
944  * main loop of the root server process. This allows us to separate the signal
945  * catching from th actual task triggered by SIGCHLD and hence processing in the
946  * interrupt context is kept as minimial as possible.
947  *
948  * @param s the signal we're handling (must be SIGCHLD, or something
949  * is severely wrong)
950  **/
951 static void sigchld_handler(const int s G_GNUC_UNUSED) {
952  is_sigchld_caught = 1;
953 }
954 
955 /**
956  * Kill a child. Called from sigterm_handler::g_hash_table_foreach.
957  *
958  * @param key the key
959  * @param value the value corresponding to the above key
960  * @param user_data a pointer which we always set to 1, so that we know what
961  * will happen next.
962  **/
963 void killchild(gpointer key, gpointer value, gpointer user_data) {
964  pid_t *pid=value;
965 
966  kill(*pid, SIGTERM);
967 }
968 
969 /**
970  * Handle SIGTERM by setting atomically a flag which will be evaluated in the
971  * main loop of the root server process. This allows us to separate the signal
972  * catching from th actual task triggered by SIGTERM and hence processing in the
973  * interrupt context is kept as minimial as possible.
974  *
975  * @param s the signal we're handling (must be SIGTERM, or something
976  * is severely wrong).
977  **/
978 static void sigterm_handler(const int s G_GNUC_UNUSED) {
979  is_sigterm_caught = 1;
980 }
981 
982 /**
983  * Handle SIGHUP by setting atomically a flag which will be evaluated in
984  * the main loop of the root server process. This allows us to separate
985  * the signal catching from th actual task triggered by SIGHUP and hence
986  * processing in the interrupt context is kept as minimial as possible.
987  *
988  * @param s the signal we're handling (must be SIGHUP, or something
989  * is severely wrong).
990  **/
991 static void sighup_handler(const int s G_GNUC_UNUSED) {
992  is_sighup_caught = 1;
993 }
994 
995 /**
996  * Get the file handle and offset, given an export offset.
997  *
998  * @param client The client we're serving for
999  * @param a The offset to get corresponding file/offset for
1000  * @param fhandle [out] File descriptor
1001  * @param foffset [out] Offset into fhandle
1002  * @param maxbytes [out] Tells how many bytes can be read/written
1003  * from fhandle starting at foffset (0 if there is no limit)
1004  * @return 0 on success, -1 on failure
1005  **/
1006 int get_filepos(CLIENT *client, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
1007 
1008  GArray * const export = client->export;
1009 
1010  /* Negative offset not allowed */
1011  if(a < 0)
1012  return -1;
1013 
1014  /* Open separate file for treefiles */
1015  if (client->server->flags & F_TREEFILES) {
1016  *foffset = a % TREEPAGESIZE;
1017  *maxbytes = (( 1 + (a/TREEPAGESIZE) ) * TREEPAGESIZE) - a; // start position of next block
1018  *fhandle = open_treefile(client->exportname, ((client->server->flags & F_READONLY) ? O_RDONLY : O_RDWR), client->exportsize,a);
1019  return 0;
1020  }
1021 
1022  /* Binary search for last file with starting offset <= a */
1023  FILE_INFO fi;
1024  int start = 0;
1025  int end = export->len - 1;
1026  while( start <= end ) {
1027  int mid = (start + end) / 2;
1028  fi = g_array_index(export, FILE_INFO, mid);
1029  if( fi.startoff < a ) {
1030  start = mid + 1;
1031  } else if( fi.startoff > a ) {
1032  end = mid - 1;
1033  } else {
1034  start = end = mid;
1035  break;
1036  }
1037  }
1038 
1039  /* end should never go negative, since first startoff is 0 and a >= 0 */
1040  assert(end >= 0);
1041 
1042  fi = g_array_index(export, FILE_INFO, end);
1043  *fhandle = fi.fhandle;
1044  *foffset = a - fi.startoff;
1045  *maxbytes = 0;
1046  if( end+1 < export->len ) {
1047  FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
1048  *maxbytes = fi_next.startoff - a;
1049  }
1050 
1051  return 0;
1052 }
1053 
1054 /**
1055  * seek to a position in a file, with error handling.
1056  * @param handle a filedescriptor
1057  * @param a position to seek to
1058  * @todo get rid of this.
1059  **/
1060 void myseek(int handle,off_t a) {
1061  if (lseek(handle, a, SEEK_SET) < 0) {
1062  err("Can not seek locally!\n");
1063  }
1064 }
1065 
1066 /**
1067  * Write an amount of bytes at a given offset to the right file. This
1068  * abstracts the write-side of the multiple file option.
1069  *
1070  * @param a The offset where the write should start
1071  * @param buf The buffer to write from
1072  * @param len The length of buf
1073  * @param client The client we're serving for
1074  * @param fua Flag to indicate 'Force Unit Access'
1075  * @return The number of bytes actually written, or -1 in case of an error
1076  **/
1077 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1078  int fhandle;
1079  off_t foffset;
1080  size_t maxbytes;
1081  ssize_t retval;
1082 
1083  if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1084  return -1;
1085  if(maxbytes && len > maxbytes)
1086  len = maxbytes;
1087 
1088  DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
1089 
1090  myseek(fhandle, foffset);
1091  retval = write(fhandle, buf, len);
1092  if(client->server->flags & F_SYNC) {
1093  fsync(fhandle);
1094  } else if (fua) {
1095 
1096  /* This is where we would do the following
1097  * #ifdef USE_SYNC_FILE_RANGE
1098  * However, we don't, for the reasons set out below
1099  * by Christoph Hellwig <hch@infradead.org>
1100  *
1101  * [BEGINS]
1102  * fdatasync is equivalent to fsync except that it does not flush
1103  * non-essential metadata (basically just timestamps in practice), but it
1104  * does flush metadata requried to find the data again, e.g. allocation
1105  * information and extent maps. sync_file_range does nothing but flush
1106  * out pagecache content - it means you basically won't get your data
1107  * back in case of a crash if you either:
1108  *
1109  * a) have a volatile write cache in your disk (e.g. any normal SATA disk)
1110  * b) are using a sparse file on a filesystem
1111  * c) are using a fallocate-preallocated file on a filesystem
1112  * d) use any file on a COW filesystem like btrfs
1113  *
1114  * e.g. it only does anything useful for you if you do not have a volatile
1115  * write cache, and either use a raw block device node, or just overwrite
1116  * an already fully allocated (and not preallocated) file on a non-COW
1117  * filesystem.
1118  * [ENDS]
1119  *
1120  * What we should do is open a second FD with O_DSYNC set, then write to
1121  * that when appropriate. However, with a Linux client, every REQ_FUA
1122  * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
1123  * problems.
1124  *
1125  */
1126 #if 0
1127  sync_file_range(fhandle, foffset, len,
1128  SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
1129  SYNC_FILE_RANGE_WAIT_AFTER);
1130 #else
1131  fdatasync(fhandle);
1132 #endif
1133  }
1134  /* close file pointer in case of treefiles */
1135  if (client->server->flags & F_TREEFILES) {
1136  close(fhandle);
1137  }
1138  return retval;
1139 }
1140 
1141 /**
1142  * Call rawexpwrite repeatedly until all data has been written.
1143  *
1144  * @param a The offset where the write should start
1145  * @param buf The buffer to write from
1146  * @param len The length of buf
1147  * @param client The client we're serving for
1148  * @param fua Flag to indicate 'Force Unit Access'
1149  * @return 0 on success, nonzero on failure
1150  **/
1151 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1152  ssize_t ret=0;
1153 
1154  while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
1155  a += ret;
1156  buf += ret;
1157  len -= ret;
1158  }
1159  return (ret < 0 || len != 0);
1160 }
1161 
1162 /**
1163  * Read an amount of bytes at a given offset from the right file. This
1164  * abstracts the read-side of the multiple files option.
1165  *
1166  * @param a The offset where the read should start
1167  * @param buf A buffer to read into
1168  * @param len The size of buf
1169  * @param client The client we're serving for
1170  * @return The number of bytes actually read, or -1 in case of an
1171  * error.
1172  **/
1173 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
1174  int fhandle;
1175  off_t foffset;
1176  size_t maxbytes;
1177  ssize_t retval;
1178 
1179  if(get_filepos(client, a, &fhandle, &foffset, &maxbytes))
1180  return -1;
1181  if(maxbytes && len > maxbytes)
1182  len = maxbytes;
1183 
1184  DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
1185 
1186  myseek(fhandle, foffset);
1187  retval = read(fhandle, buf, len);
1188  if (client->server->flags & F_TREEFILES) {
1189  close(fhandle);
1190  }
1191  return retval;
1192 }
1193 
1194 /**
1195  * Call rawexpread repeatedly until all data has been read.
1196  * @return 0 on success, nonzero on failure
1197  **/
1198 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
1199  ssize_t ret=0;
1200 
1201  while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
1202  a += ret;
1203  buf += ret;
1204  len -= ret;
1205  }
1206  return (ret < 0 || len != 0);
1207 }
1208 
1209 /**
1210  * Read an amount of bytes at a given offset from the right file. This
1211  * abstracts the read-side of the copyonwrite stuff, and calls
1212  * rawexpread() with the right parameters to do the actual work.
1213  * @param a The offset where the read should start
1214  * @param buf A buffer to read into
1215  * @param len The size of buf
1216  * @param client The client we're going to read for
1217  * @return 0 on success, nonzero on failure
1218  **/
1219 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
1220  off_t rdlen, offset;
1221  off_t mapcnt, mapl, maph, pagestart;
1222 
1223  if (!(client->server->flags & F_COPYONWRITE))
1224  return(rawexpread_fully(a, buf, len, client));
1225  DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1226 
1227  mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
1228 
1229  for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1230  pagestart=mapcnt*DIFFPAGESIZE;
1231  offset=a-pagestart;
1232  rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1233  len : (size_t)DIFFPAGESIZE-offset;
1234  if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1235  DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1236  (unsigned long)(client->difmap[mapcnt]));
1237  myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1238  if (read(client->difffile, buf, rdlen) != rdlen) return -1;
1239  } else { /* the block is not there */
1240  DEBUG("Page %llu is not here, we read the original one\n",
1241  (unsigned long long)mapcnt);
1242  if(rawexpread_fully(a, buf, rdlen, client)) return -1;
1243  }
1244  len-=rdlen; a+=rdlen; buf+=rdlen;
1245  }
1246  return 0;
1247 }
1248 
1249 /**
1250  * Write an amount of bytes at a given offset to the right file. This
1251  * abstracts the write-side of the copyonwrite option, and calls
1252  * rawexpwrite() with the right parameters to do the actual work.
1253  *
1254  * @param a The offset where the write should start
1255  * @param buf The buffer to write from
1256  * @param len The length of buf
1257  * @param client The client we're going to write for.
1258  * @param fua Flag to indicate 'Force Unit Access'
1259  * @return 0 on success, nonzero on failure
1260  **/
1261 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
1262  char pagebuf[DIFFPAGESIZE];
1263  off_t mapcnt,mapl,maph;
1264  off_t wrlen,rdlen;
1265  off_t pagestart;
1266  off_t offset;
1267 
1268  if (!(client->server->flags & F_COPYONWRITE))
1269  return(rawexpwrite_fully(a, buf, len, client, fua));
1270  DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
1271 
1272  mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
1273 
1274  for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
1275  pagestart=mapcnt*DIFFPAGESIZE ;
1276  offset=a-pagestart ;
1277  wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
1278  len : (size_t)DIFFPAGESIZE-offset;
1279 
1280  if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
1281  DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
1282  (unsigned long)(client->difmap[mapcnt])) ;
1283  myseek(client->difffile,
1284  client->difmap[mapcnt]*DIFFPAGESIZE+offset);
1285  if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
1286  } else { /* the block is not there */
1287  myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
1288  client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
1289  DEBUG("Page %llu is not here, we put it at %lu\n",
1290  (unsigned long long)mapcnt,
1291  (unsigned long)(client->difmap[mapcnt]));
1292  rdlen=DIFFPAGESIZE ;
1293  if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
1294  return -1;
1295  memcpy(pagebuf+offset,buf,wrlen) ;
1296  if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
1297  DIFFPAGESIZE)
1298  return -1;
1299  }
1300  len-=wrlen ; a+=wrlen ; buf+=wrlen ;
1301  }
1302  if (client->server->flags & F_SYNC) {
1303  fsync(client->difffile);
1304  } else if (fua) {
1305  /* open question: would it be cheaper to do multiple sync_file_ranges?
1306  as we iterate through the above?
1307  */
1308  fdatasync(client->difffile);
1309  }
1310  return 0;
1311 }
1312 
1313 /**
1314  * Flush data to a client
1315  *
1316  * @param client The client we're going to write for.
1317  * @return 0 on success, nonzero on failure
1318  **/
1319 int expflush(CLIENT *client) {
1320  gint i;
1321 
1322  if (client->server->flags & F_COPYONWRITE) {
1323  return fsync(client->difffile);
1324  }
1325 
1326  if (client->server->flags & F_TREEFILES ) {
1327  // all we can do is force sync the entire filesystem containing the tree
1328  if (client->server->flags & F_READONLY)
1329  return 0;
1330  sync();
1331  return 0;
1332  }
1333 
1334  for (i = 0; i < client->export->len; i++) {
1335  FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
1336  if (fsync(fi.fhandle) < 0)
1337  return -1;
1338  }
1339 
1340  return 0;
1341 }
1342 
1343 /*
1344  * If the current system supports it, call fallocate() on the backend
1345  * file to resparsify stuff that isn't needed anymore (see NBD_CMD_TRIM)
1346  */
1347 int exptrim(struct nbd_request* req, CLIENT* client) {
1348  if (client->server->flags & F_TREEFILES) {
1349  if (client->server->flags & F_READONLY)
1350  return 0;
1351 
1352  off_t min = ( ( req->from + TREEPAGESIZE - 1 ) / TREEPAGESIZE) * TREEPAGESIZE; // start address of first to be trimmed block
1353  off_t max = ( ( req->from + req->len ) / TREEPAGESIZE) * TREEPAGESIZE; // start address of first not to be trimmed block
1354  while (min<max) {
1355  delete_treefile(client->exportname,client->exportsize,min);
1356  min+=TREEPAGESIZE;
1357  }
1358  DEBUG("Performed TRIM request on TREE structure from %llu to %llu", (unsigned long long) req->from, (unsigned long long) req->len);
1359  return 0;
1360  }
1361 #if HAVE_FALLOC_PH
1362  FILE_INFO prev = g_array_index(client->export, FILE_INFO, 0);
1363  FILE_INFO cur = prev;
1364  int i = 1;
1365  /* We're running on a system that supports the
1366  * FALLOC_FL_PUNCH_HOLE option to re-sparsify a file */
1367  do {
1368  if(i<client->export->len) {
1369  cur = g_array_index(client->export, FILE_INFO, i);
1370  }
1371  if(prev.startoff <= req->from) {
1372  off_t curoff = req->from - prev.startoff;
1373  off_t curlen = cur.startoff - prev.startoff - curoff;
1374  fallocate(prev.fhandle, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, curoff, curlen);
1375  }
1376  prev = cur;
1377  } while(i < client->export->len && cur.startoff < (req->from + req->len));
1378  DEBUG("Performed TRIM request from %llu to %llu", (unsigned long long) req->from, (unsigned long long) req->len);
1379 #else
1380  DEBUG("Ignoring TRIM request (not supported on current platform");
1381 #endif
1382  return 0;
1383 }
1384 
1385 static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void* data) {
1386  uint64_t magic = htonll(0x3e889045565a9LL);
1387  reply_type = htonl(reply_type);
1388  uint32_t datsize = htonl(datasize);
1389  opt = htonl(opt);
1390  struct iovec v_data[] = {
1391  { &magic, sizeof(magic) },
1392  { &opt, sizeof(opt) },
1393  { &reply_type, sizeof(reply_type) },
1394  { &datsize, sizeof(datsize) },
1395  { data, datasize },
1396  };
1397  size_t total = sizeof(magic) + sizeof(opt) + sizeof(reply_type) + sizeof(datsize) + datasize;
1398  ssize_t sent = writev(net, v_data, 5);
1399  if(sent != total) {
1400  perror("E: couldn't write enough data:");
1401  }
1402 }
1403 
1404 static CLIENT* handle_export_name(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
1405  uint32_t namelen;
1406  char* name;
1407  int i;
1408 
1409  if (read(net, &namelen, sizeof(namelen)) < 0) {
1410  err("Negotiation failed/7: %m");
1411  return NULL;
1412  }
1413  namelen = ntohl(namelen);
1414  name = malloc(namelen+1);
1415  name[namelen]=0;
1416  if (read(net, name, namelen) < 0) {
1417  err("Negotiation failed/8: %m");
1418  free(name);
1419  return NULL;
1420  }
1421  for(i=0; i<servers->len; i++) {
1422  SERVER* serve = &(g_array_index(servers, SERVER, i));
1423  if(!strcmp(serve->servename, name)) {
1424  CLIENT* client = g_new0(CLIENT, 1);
1425  client->server = serve;
1426  client->exportsize = OFFT_MAX;
1427  client->net = net;
1428  client->modern = TRUE;
1429  client->transactionlogfd = -1;
1430  client->clientfeats = cflags;
1431  free(name);
1432  return client;
1433  }
1434  }
1435  err("Negotiation failed/8a: Requested export not found");
1436  free(name);
1437  return NULL;
1438 }
1439 
1440 static void handle_list(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
1441  uint32_t len;
1442  int i;
1443  char buf[1024];
1444  char *ptr = buf + sizeof(len);
1445 
1446  if (read(net, &len, sizeof(len)) < 0)
1447  err("Negotiation failed/8: %m");
1448  len = ntohl(len);
1449  if(len) {
1450  send_reply(opt, net, NBD_REP_ERR_INVALID, 0, NULL);
1451  }
1452  if(!(glob_flags & F_LIST)) {
1453  send_reply(opt, net, NBD_REP_ERR_POLICY, 0, NULL);
1454  err_nonfatal("Client tried disallowed list option");
1455  return;
1456  }
1457  for(i=0; i<servers->len; i++) {
1458  SERVER* serve = &(g_array_index(servers, SERVER, i));
1459  len = htonl(strlen(serve->servename));
1460  memcpy(buf, &len, sizeof(len));
1461  strcpy(ptr, serve->servename);
1462  send_reply(opt, net, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
1463  }
1464  send_reply(opt, net, NBD_REP_ACK, 0, NULL);
1465 }
1466 
1467 /**
1468  * Do the initial negotiation.
1469  *
1470  * @param client The client we're negotiating with.
1471  **/
1472 CLIENT* negotiate(int net, GArray* servers) {
1473  uint32_t flags = NBD_FLAG_HAS_FLAGS;
1474  uint16_t smallflags = NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES;
1475  uint64_t magic;
1476  uint32_t cflags = 0;
1477  uint32_t opt;
1478 
1479  assert(servers != NULL);
1480  if (write(net, INIT_PASSWD, 8) < 0)
1481  err_nonfatal("Negotiation failed/1: %m");
1482  magic = htonll(opts_magic);
1483  if (write(net, &magic, sizeof(magic)) < 0)
1484  err_nonfatal("Negotiation failed/2: %m");
1485 
1486  smallflags = htons(smallflags);
1487  if (write(net, &smallflags, sizeof(uint16_t)) < 0)
1488  err_nonfatal("Negotiation failed/3: %m");
1489  if (read(net, &cflags, sizeof(cflags)) < 0)
1490  err_nonfatal("Negotiation failed/4: %m");
1491  cflags = htonl(cflags);
1492  if (cflags & NBD_FLAG_C_NO_ZEROES) {
1494  }
1495  do {
1496  if (read(net, &magic, sizeof(magic)) < 0)
1497  err_nonfatal("Negotiation failed/5: %m");
1498  magic = ntohll(magic);
1499  if(magic != opts_magic) {
1500  err_nonfatal("Negotiation failed/5a: magic mismatch");
1501  return NULL;
1502  }
1503  if (read(net, &opt, sizeof(opt)) < 0)
1504  err_nonfatal("Negotiation failed/6: %m");
1505  opt = ntohl(opt);
1506  switch(opt) {
1507  case NBD_OPT_EXPORT_NAME:
1508  // NBD_OPT_EXPORT_NAME must be the last
1509  // selected option, so return from here
1510  // if that is chosen.
1511  return handle_export_name(opt, net, servers, cflags);
1512  break;
1513  case NBD_OPT_LIST:
1514  handle_list(opt, net, servers, cflags);
1515  break;
1516  case NBD_OPT_ABORT:
1517  // handled below
1518  break;
1519  default:
1520  send_reply(opt, net, NBD_REP_ERR_UNSUP, 0, NULL);
1521  break;
1522  }
1523  } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
1524  if(opt == NBD_OPT_ABORT) {
1525  err_nonfatal("Session terminated by client");
1526  return NULL;
1527  }
1528  return NULL;
1529 }
1530 
1531 void send_export_info(CLIENT* client) {
1532  uint64_t size_host = htonll((u64)(client->exportsize));
1533  uint16_t flags = 0;
1534 
1535  if (write(client->net, &size_host, 8) < 0)
1536  err("Negotiation failed/9: %m");
1537  if (client->server->flags & F_READONLY)
1538  flags |= NBD_FLAG_READ_ONLY;
1539  if (client->server->flags & F_FLUSH)
1540  flags |= NBD_FLAG_SEND_FLUSH;
1541  if (client->server->flags & F_FUA)
1542  flags |= NBD_FLAG_SEND_FUA;
1543  if (client->server->flags & F_ROTATIONAL)
1544  flags |= NBD_FLAG_ROTATIONAL;
1545  if (client->server->flags & F_TRIM)
1546  flags |= NBD_FLAG_SEND_TRIM;
1547  flags = htons(flags);
1548  if (write(client->net, &flags, sizeof(flags)) < 0)
1549  err("Negotiation failed/11: %m");
1550  if (!(glob_flags & F_NO_ZEROES)) {
1551  char zeros[128];
1552  memset(zeros, '\0', sizeof(zeros));
1553  if (write(client->net, zeros, 124) < 0)
1554  err("Negotiation failed/12: %m");
1555  }
1556 }
1557 
1558 static int nbd_errno(int errcode) {
1559  switch (errcode) {
1560  case EPERM:
1561  return htonl(1);
1562  case EIO:
1563  return htonl(5);
1564  case ENOMEM:
1565  return htonl(12);
1566  case EINVAL:
1567  return htonl(22);
1568  case EFBIG:
1569  case ENOSPC:
1570 #ifdef EDQUOT
1571  case EDQUOT:
1572 #endif
1573  return htonl(28); // ENOSPC
1574  default:
1575  return htonl(22); // EINVAL
1576  }
1577 }
1578 
1579 /** sending macro. */
1580 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
1581  if (client->transactionlogfd != -1) \
1582  writeit(client->transactionlogfd, &reply, sizeof(reply)); }
1583 /** error macro. */
1584 #define ERROR(client,reply,errcode) { reply.error = nbd_errno(errcode); SEND(client->net,reply); reply.error = 0; }
1585 /**
1586  * Serve a file to a single client.
1587  *
1588  * @todo This beast needs to be split up in many tiny little manageable
1589  * pieces. Preferably with a chainsaw.
1590  *
1591  * @param client The client we're going to serve to.
1592  * @return when the client disconnects
1593  **/
1594 int mainloop(CLIENT *client) {
1595  struct nbd_request request;
1596  struct nbd_reply reply;
1597  gboolean go_on=TRUE;
1598 #ifdef DODBG
1599  int i = 0;
1600 #endif
1601  send_export_info(client);
1602  DEBUG("Entering request loop!\n");
1603  reply.magic = htonl(NBD_REPLY_MAGIC);
1604  reply.error = 0;
1605  while (go_on) {
1606  char buf[BUFSIZE];
1607  char* p;
1608  size_t len;
1609  size_t currlen;
1610  size_t writelen;
1611  uint16_t command;
1612 #ifdef DODBG
1613  i++;
1614  printf("%d: ", i);
1615 #endif
1616  readit(client->net, &request, sizeof(request));
1617  if (client->transactionlogfd != -1)
1618  writeit(client->transactionlogfd, &request, sizeof(request));
1619 
1620  request.from = ntohll(request.from);
1621  request.type = ntohl(request.type);
1622  command = request.type & NBD_CMD_MASK_COMMAND;
1623  len = ntohl(request.len);
1624 
1625  DEBUG("%s from %llu (%llu) len %u, ", getcommandname(command),
1626  (unsigned long long)request.from,
1627  (unsigned long long)request.from / 512, len);
1628 
1629  if (request.magic != htonl(NBD_REQUEST_MAGIC))
1630  err("Not enough magic.");
1631 
1632  memcpy(reply.handle, request.handle, sizeof(reply.handle));
1633 
1634  if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ) ||
1635  (command==NBD_CMD_TRIM)) {
1636  if (request.from + len < request.from) { // 64 bit overflow!!
1637  DEBUG("[Number too large!]");
1638  ERROR(client, reply, EINVAL);
1639  continue;
1640  }
1641 
1642  if (((off_t)request.from + len) > client->exportsize) {
1643  DEBUG("[RANGE!]");
1644  ERROR(client, reply, (command==NBD_CMD_WRITE) ? ENOSPC : EINVAL);
1645  continue;
1646  }
1647 
1648  currlen = len;
1649  if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
1650  currlen = BUFSIZE - sizeof(struct nbd_reply);
1651  if(!logged_oversized) {
1652  msg(LOG_DEBUG, "oversized request (this is not a problem)");
1653  logged_oversized = true;
1654  }
1655  }
1656  }
1657 
1658  switch (command) {
1659 
1660  case NBD_CMD_DISC:
1661  msg(LOG_INFO, "Disconnect request received.");
1662  if (client->server->flags & F_COPYONWRITE) {
1663  if (client->difmap) g_free(client->difmap) ;
1664  close(client->difffile);
1665  unlink(client->difffilename);
1666  free(client->difffilename);
1667  }
1668  go_on=FALSE;
1669  continue;
1670 
1671  case NBD_CMD_WRITE:
1672  DEBUG("wr: net->buf, ");
1673  while(len > 0) {
1674  readit(client->net, buf, currlen);
1675  DEBUG("buf->exp, ");
1676  if ((client->server->flags & F_READONLY) ||
1677  (client->server->flags & F_AUTOREADONLY)) {
1678  DEBUG("[WRITE to READONLY!]");
1679  ERROR(client, reply, EPERM);
1680  consume(client->net, buf, len-currlen, BUFSIZE);
1681  continue;
1682  }
1683  if (expwrite(request.from, buf, currlen, client,
1684  request.type & NBD_CMD_FLAG_FUA)) {
1685  DEBUG("Write failed: %m" );
1686  ERROR(client, reply, errno);
1687  consume(client->net, buf, len-currlen, BUFSIZE);
1688  continue;
1689  }
1690  len -= currlen;
1691  request.from += currlen;
1692  currlen = (len < BUFSIZE) ? len : BUFSIZE;
1693  }
1694  SEND(client->net, reply);
1695  DEBUG("OK!\n");
1696  continue;
1697 
1698  case NBD_CMD_FLUSH:
1699  DEBUG("fl: ");
1700  if (expflush(client)) {
1701  DEBUG("Flush failed: %m");
1702  ERROR(client, reply, errno);
1703  continue;
1704  }
1705  SEND(client->net, reply);
1706  DEBUG("OK!\n");
1707  continue;
1708 
1709  case NBD_CMD_READ:
1710  DEBUG("exp->buf, ");
1711  if (client->transactionlogfd != -1)
1712  writeit(client->transactionlogfd, &reply, sizeof(reply));
1713  writeit(client->net, &reply, sizeof(reply));
1714  p = buf;
1715  writelen = currlen;
1716  while(len > 0) {
1717  if (expread(request.from, p, currlen, client)) {
1718  DEBUG("Read failed: %m");
1719  ERROR(client, reply, errno);
1720  continue;
1721  }
1722 
1723  DEBUG("buf->net, ");
1724  writeit(client->net, buf, writelen);
1725  len -= currlen;
1726  request.from += currlen;
1727  currlen = (len < BUFSIZE) ? len : BUFSIZE;
1728  p = buf;
1729  writelen = currlen;
1730  }
1731  DEBUG("OK!\n");
1732  continue;
1733 
1734  case NBD_CMD_TRIM:
1735  /* The kernel module sets discard_zeroes_data == 0,
1736  * so it is okay to do nothing. */
1737  if ((client->server->flags & F_READONLY) ||
1738  (client->server->flags & F_AUTOREADONLY)) {
1739  DEBUG("[TRIM to READONLY!]");
1740  ERROR(client, reply, EPERM);
1741  continue;
1742  }
1743  if (exptrim(&request, client)) {
1744  DEBUG("Trim failed: %m");
1745  ERROR(client, reply, errno);
1746  continue;
1747  }
1748  SEND(client->net, reply);
1749  continue;
1750 
1751  default:
1752  DEBUG ("Ignoring unknown command\n");
1753  continue;
1754  }
1755  }
1756  return 0;
1757 }
1758 
1759 /**
1760  * Set up client export array, which is an array of FILE_INFO.
1761  * Also, split a single exportfile into multiple ones, if that was asked.
1762  * @param client information on the client which we want to setup export for
1763  **/
1764 void setupexport(CLIENT* client) {
1765  int i;
1766  off_t laststartoff = 0, lastsize = 0;
1767  int multifile = (client->server->flags & F_MULTIFILE);
1768  int treefile = (client->server->flags & F_TREEFILES);
1769  int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
1770  int cancreate = (client->server->expected_size) && !multifile;
1771 
1772  if (treefile) {
1773  client->export = NULL; // this could be thousands of files so we open handles on demand although its slower
1774  client->exportsize = client->server->expected_size; // available space is not checked, as it could change during runtime anyway
1775  } else {
1776 
1777  client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
1778 
1779  /* If multi-file, open as many files as we can.
1780  * If not, open exactly one file.
1781  * Calculate file sizes as we go to get total size. */
1782  for(i=0; ; i++) {
1783  FILE_INFO fi;
1784  gchar *tmpname;
1785  gchar* error_string;
1786 
1787  if (i)
1788  cancreate = 0;
1789  /* if expected_size is specified, and this is the first file, we can create the file */
1790  mode_t mode = (client->server->flags & F_READONLY) ?
1791  O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
1792 
1793  if (temporary) {
1794  tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
1795  DEBUG( "Opening %s\n", tmpname );
1796  fi.fhandle = mkstemp(tmpname);
1797  } else {
1798  if(multifile) {
1799  tmpname=g_strdup_printf("%s.%d", client->exportname, i);
1800  } else {
1801  tmpname=g_strdup(client->exportname);
1802  }
1803  DEBUG( "Opening %s\n", tmpname );
1804  fi.fhandle = open(tmpname, mode, 0600);
1805  if(fi.fhandle == -1 && mode == O_RDWR) {
1806  /* Try again because maybe media was read-only */
1807  fi.fhandle = open(tmpname, O_RDONLY);
1808  if(fi.fhandle != -1) {
1809  /* Opening the base file in copyonwrite mode is
1810  * okay */
1811  if(!(client->server->flags & F_COPYONWRITE)) {
1812  client->server->flags |= F_AUTOREADONLY;
1813  client->server->flags |= F_READONLY;
1814  }
1815  }
1816  }
1817  }
1818  if(fi.fhandle == -1) {
1819  if(multifile && i>0)
1820  break;
1821  error_string=g_strdup_printf(
1822  "Could not open exported file %s: %%m",
1823  tmpname);
1824  err(error_string);
1825  }
1826 
1827  if (temporary)
1828  unlink(tmpname); /* File will stick around whilst FD open */
1829 
1830  fi.startoff = laststartoff + lastsize;
1831  g_array_append_val(client->export, fi);
1832  g_free(tmpname);
1833 
1834  /* Starting offset and size of this file will be used to
1835  * calculate starting offset of next file */
1836  laststartoff = fi.startoff;
1837  lastsize = size_autodetect(fi.fhandle);
1838 
1839  /* If we created the file, it will be length zero */
1840  if (!lastsize && cancreate) {
1841  assert(!multifile);
1842  if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
1843  err("Could not expand file: %m");
1844  }
1845  lastsize = client->server->expected_size;
1846  break; /* don't look for any more files */
1847  }
1848 
1849  if(!multifile || temporary)
1850  break;
1851  }
1852 
1853  /* Set export size to total calculated size */
1854  client->exportsize = laststartoff + lastsize;
1855 
1856  /* Export size may be overridden */
1857  if(client->server->expected_size) {
1858  /* desired size must be <= total calculated size */
1859  if(client->server->expected_size > client->exportsize) {
1860  err("Size of exported file is too big\n");
1861  }
1862 
1863  client->exportsize = client->server->expected_size;
1864  }
1865  }
1866 
1867  msg(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
1868  if(multifile) {
1869  msg(LOG_INFO, "Total number of files: %d", i);
1870  }
1871  if(treefile) {
1872  msg(LOG_INFO, "Total number of (potential) files: %d", (client->exportsize+TREEPAGESIZE-1)/TREEPAGESIZE);
1873  }
1874 }
1875 
1877  off_t i;
1878  gchar* dir;
1879  gchar* export_base;
1880  if (client->server->cowdir != NULL) {
1881  dir = g_strdup(client->server->cowdir);
1882  } else {
1883  dir = g_strdup(dirname(client->exportname));
1884  }
1885  export_base = g_strdup(basename(client->exportname));
1886  client->difffilename = g_strdup_printf("%s/%s-%s-%d.diff",dir,export_base,client->clientname,
1887  (int)getpid());
1888  g_free(dir);
1889  g_free(export_base);
1890  msg(LOG_INFO, "About to create map and diff file %s", client->difffilename) ;
1891  client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
1892  if (client->difffile<0) err("Could not create diff file (%m)") ;
1893  if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
1894  err("Could not allocate memory") ;
1895  for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
1896 
1897  return 0;
1898 }
1899 
1900 /**
1901  * Run a command. This is used for the ``prerun'' and ``postrun'' config file
1902  * options
1903  *
1904  * @param command the command to be ran. Read from the config file
1905  * @param file the file name we're about to export
1906  **/
1907 int do_run(gchar* command, gchar* file) {
1908  gchar* cmd;
1909  int retval=0;
1910 
1911  if(command && *command) {
1912  cmd = g_strdup_printf(command, file);
1913  retval=system(cmd);
1914  g_free(cmd);
1915  }
1916  return retval;
1917 }
1918 
1919 /**
1920  * Serve a connection.
1921  *
1922  * @todo allow for multithreading, perhaps use libevent. Not just yet, though;
1923  * follow the road map.
1924  *
1925  * @param client a connected client
1926  **/
1927 void serveconnection(CLIENT *client) {
1928  if (client->server->transactionlog && (client->transactionlogfd == -1))
1929  {
1930  if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
1931  O_WRONLY | O_CREAT,
1932  S_IRUSR | S_IWUSR)))
1933  g_warning("Could not open transaction log %s",
1934  client->server->transactionlog);
1935  }
1936 
1937  if(do_run(client->server->prerun, client->exportname)) {
1938  exit(EXIT_FAILURE);
1939  }
1940  setupexport(client);
1941 
1942  if (client->server->flags & F_COPYONWRITE) {
1943  copyonwrite_prepare(client);
1944  }
1945 
1946  setmysockopt(client->net);
1947 
1948  mainloop(client);
1949  do_run(client->server->postrun, client->exportname);
1950 
1951  if (-1 != client->transactionlogfd)
1952  {
1953  close(client->transactionlogfd);
1954  client->transactionlogfd = -1;
1955  }
1956 }
1957 
1958 /**
1959  * Find the name of the file we have to serve. This will use g_strdup_printf
1960  * to put the IP address of the client inside a filename containing
1961  * "%s" (in the form as specified by the "virtstyle" option). That name
1962  * is then written to client->exportname.
1963  *
1964  * @param net A socket connected to an nbd client
1965  * @param client information about the client. The IP address in human-readable
1966  * format will be written to a new char* buffer, the address of which will be
1967  * stored in client->clientname.
1968  * @return: 0 - OK, -1 - failed.
1969  **/
1970 int set_peername(int net, CLIENT *client) {
1971  struct sockaddr_storage netaddr;
1972  struct sockaddr* addr = (struct sockaddr*)&netaddr;
1973  socklen_t addrinlen = sizeof( struct sockaddr_storage );
1974  struct addrinfo hints;
1975  struct addrinfo *ai = NULL;
1976  char peername[NI_MAXHOST];
1977  char netname[NI_MAXHOST];
1978  char *tmp = NULL;
1979  int i;
1980  int e;
1981 
1982  if (getpeername(net, (struct sockaddr *) &(client->clientaddr), &addrinlen) < 0) {
1983  msg(LOG_INFO, "getpeername failed: %m");
1984  return -1;
1985  }
1986 
1987  if(addr->sa_family == AF_UNIX) {
1988  strcpy(peername, "unix");
1989  } else {
1990  if((e = getnameinfo((struct sockaddr *)&(client->clientaddr), addrinlen,
1991  peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
1992  msg(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
1993  return -1;
1994  }
1995 
1996  memset(&hints, '\0', sizeof (hints));
1997  hints.ai_flags = AI_ADDRCONFIG;
1998  e = getaddrinfo(peername, NULL, &hints, &ai);
1999 
2000  if(e != 0) {
2001  msg(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
2002  freeaddrinfo(ai);
2003  return -1;
2004  }
2005  }
2006 
2007  switch(client->server->virtstyle) {
2008  case VIRT_NONE:
2009  msg(LOG_DEBUG, "virtualization is off");
2010  client->exportname=g_strdup(client->server->exportname);
2011  break;
2012  case VIRT_IPHASH:
2013  msg(LOG_DEBUG, "virtstyle iphash");
2014  for(i=0;i<strlen(peername);i++) {
2015  if(peername[i]=='.') {
2016  peername[i]='/';
2017  }
2018  }
2019  case VIRT_IPLIT:
2020  msg(LOG_DEBUG, "virtstyle ipliteral");
2021  client->exportname=g_strdup_printf(client->server->exportname, peername);
2022  break;
2023  case VIRT_CIDR:
2024  msg(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
2025  memcpy(&netaddr, &(client->clientaddr), addrinlen);
2026  int addrbits;
2027  if(addr->sa_family == AF_UNIX) {
2028  tmp = g_strdup(peername);
2029  } else {
2030  assert((ai->ai_family == AF_INET) || (ai->ai_family == AF_INET6));
2031  if(ai->ai_family == AF_INET) {
2032  addrbits = 32;
2033  } else if(ai->ai_family == AF_INET6) {
2034  addrbits = 128;
2035  }
2036  uint8_t* addrptr = (uint8_t*)(((struct sockaddr*)&netaddr)->sa_data);
2037  for(int i = 0; i < addrbits; i+=8) {
2038  int masklen = client->server->cidrlen - i;
2039  masklen = masklen > 0 ? masklen : 0;
2040  uint8_t mask = getmaskbyte(masklen);
2041  *addrptr &= mask;
2042  addrptr++;
2043  }
2044  getnameinfo((struct sockaddr *) &netaddr, addrinlen,
2045  netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
2046  tmp=g_strdup_printf("%s/%s", netname, peername);
2047  }
2048 
2049  if(tmp != NULL) {
2050  client->exportname=g_strdup_printf(client->server->exportname, tmp);
2051  g_free(tmp);
2052  }
2053 
2054  break;
2055  }
2056 
2057  freeaddrinfo(ai);
2058  msg(LOG_INFO, "connect from %s, assigned file is %s",
2059  peername, client->exportname);
2060  client->clientname=g_strdup(peername);
2061  return 0;
2062 }
2063 
2064 /**
2065  * Destroy a pid_t*
2066  * @param data a pointer to pid_t which should be freed
2067  **/
2068 void destroy_pid_t(gpointer data) {
2069  g_free(data);
2070 }
2071 
2072 static pid_t
2074 {
2075  pid_t pid;
2076  sigset_t newset;
2077  sigset_t oldset;
2078 
2079  sigemptyset(&newset);
2080  sigaddset(&newset, SIGCHLD);
2081  sigaddset(&newset, SIGTERM);
2082  sigprocmask(SIG_BLOCK, &newset, &oldset);
2083  pid = fork();
2084  if (pid < 0) {
2085  msg(LOG_ERR, "Could not fork (%s)", strerror(errno));
2086  goto out;
2087  }
2088  if (pid > 0) { /* Parent */
2089  pid_t *pidp;
2090 
2091  pidp = g_malloc(sizeof(pid_t));
2092  *pidp = pid;
2093  g_hash_table_insert(children, pidp, pidp);
2094  goto out;
2095  }
2096  /* Child */
2097 
2098  /* Child's signal disposition is reset to default. */
2099  signal(SIGCHLD, SIG_DFL);
2100  signal(SIGTERM, SIG_DFL);
2101  signal(SIGHUP, SIG_DFL);
2102  sigemptyset(&oldset);
2103 out:
2104  sigprocmask(SIG_SETMASK, &oldset, NULL);
2105  return pid;
2106 }
2107 
2108 static int
2109 socket_accept(const int sock)
2110 {
2111  struct sockaddr_storage addrin;
2112  socklen_t addrinlen = sizeof(addrin);
2113  int net;
2114 
2115  net = accept(sock, (struct sockaddr *) &addrin, &addrinlen);
2116  if (net < 0) {
2117  err_nonfatal("Failed to accept socket connection: %m");
2118  }
2119 
2120  return net;
2121 }
2122 
2123 static void
2124 handle_modern_connection(GArray *const servers, const int sock)
2125 {
2126  int net;
2127  pid_t pid;
2128  CLIENT *client = NULL;
2129  int sock_flags_old;
2130  int sock_flags_new;
2131 
2132  net = socket_accept(sock);
2133  if (net < 0)
2134  return;
2135 
2136  if (!dontfork) {
2137  pid = spawn_child();
2138  if (pid) {
2139  if (pid > 0)
2140  msg(LOG_INFO, "Spawned a child process");
2141  if (pid < 0)
2142  msg(LOG_ERR, "Failed to spawn a child process");
2143  close(net);
2144  return;
2145  }
2146  /* Child just continues. */
2147  }
2148 
2149  client = negotiate(net, servers);
2150  if (!client) {
2151  msg(LOG_ERR, "Modern initial negotiation failed");
2152  goto handler_err;
2153  }
2154 
2155  if (client->server->max_connections > 0 &&
2156  g_hash_table_size(children) >= client->server->max_connections) {
2157  msg(LOG_ERR, "Max connections (%d) reached",
2158  client->server->max_connections);
2159  goto handler_err;
2160  }
2161 
2162  sock_flags_old = fcntl(net, F_GETFL, 0);
2163  if (sock_flags_old == -1) {
2164  msg(LOG_ERR, "Failed to get socket flags");
2165  goto handler_err;
2166  }
2167 
2168  sock_flags_new = sock_flags_old & ~O_NONBLOCK;
2169  if (sock_flags_new != sock_flags_old &&
2170  fcntl(net, F_SETFL, sock_flags_new) == -1) {
2171  msg(LOG_ERR, "Failed to set socket to blocking mode");
2172  goto handler_err;
2173  }
2174 
2175  if (set_peername(net, client)) {
2176  msg(LOG_ERR, "Failed to set peername");
2177  goto handler_err;
2178  }
2179 
2180  if (!authorized_client(client)) {
2181  msg(LOG_INFO, "Client '%s' is not authorized to access",
2182  client->clientname);
2183  goto handler_err;
2184  }
2185 
2186  if (!dontfork) {
2187  int i;
2188 
2189  /* Free all root server resources here, because we are
2190  * currently in the child process serving one specific
2191  * connection. These are not simply needed anymore. */
2192  g_hash_table_destroy(children);
2193  children = NULL;
2194  for (i = 0; i < modernsocks->len; i++) {
2195  close(g_array_index(modernsocks, int, i));
2196  }
2197  g_array_free(modernsocks, TRUE);
2198 
2199  /* Now that we are in the child process after a
2200  * succesful negotiation, we do not need the list of
2201  * servers anymore, get rid of it.*/
2202 
2203  for (i = 0; i < servers->len; i++) {
2204  const SERVER *const server = &g_array_index(servers, SERVER, i);
2205  close(server->socket);
2206  }
2207 
2208  /* FALSE does not free the
2209  actual data. This is required,
2210  because the client has a
2211  direct reference into that
2212  data, and otherwise we get a
2213  segfault... */
2214  g_array_free(servers, FALSE);
2215  }
2216 
2217  msg(LOG_INFO, "Starting to serve");
2218  serveconnection(client);
2219  exit(EXIT_SUCCESS);
2220 
2221 handler_err:
2222  g_free(client);
2223  close(net);
2224 
2225  if (!dontfork) {
2226  exit(EXIT_FAILURE);
2227  }
2228 }
2229 
2230 static void
2231 handle_oldstyle_connection(GArray *const servers, SERVER *const serve)
2232 {
2233  int net;
2234  CLIENT *client = NULL;
2235  int sock_flags_old;
2236  int sock_flags_new;
2237 
2238  net = socket_accept(serve->socket);
2239  if (net < 0)
2240  return;
2241 
2242  if(serve->max_connections > 0 &&
2243  g_hash_table_size(children) >= serve->max_connections) {
2244  msg(LOG_INFO, "Max connections reached");
2245  goto handle_connection_out;
2246  }
2247  if((sock_flags_old = fcntl(net, F_GETFL, 0)) == -1) {
2248  err("fcntl F_GETFL");
2249  }
2250  sock_flags_new = sock_flags_old & ~O_NONBLOCK;
2251  if (sock_flags_new != sock_flags_old &&
2252  fcntl(net, F_SETFL, sock_flags_new) == -1) {
2253  err("fcntl F_SETFL ~O_NONBLOCK");
2254  }
2255 
2256  client = g_new0(CLIENT, 1);
2257  client->server=serve;
2258  client->exportsize=OFFT_MAX;
2259  client->net=net;
2260  client->transactionlogfd = -1;
2261 
2262  if (set_peername(net, client)) {
2263  goto handle_connection_out;
2264  }
2265  if (!authorized_client(client)) {
2266  msg(LOG_INFO, "Unauthorized client");
2267  goto handle_connection_out;
2268  }
2269  msg(LOG_INFO, "Authorized client");
2270 
2271  if (!dontfork) {
2272  pid_t pid;
2273  int i;
2274  sigset_t newset;
2275  sigset_t oldset;
2276 
2277  sigemptyset(&newset);
2278  sigaddset(&newset, SIGCHLD);
2279  sigaddset(&newset, SIGTERM);
2280  sigprocmask(SIG_BLOCK, &newset, &oldset);
2281  if ((pid = fork()) < 0) {
2282  msg(LOG_INFO, "Could not fork (%s)", strerror(errno));
2283  sigprocmask(SIG_SETMASK, &oldset, NULL);
2284  goto handle_connection_out;
2285  }
2286  if (pid > 0) { /* parent */
2287  pid_t *pidp;
2288 
2289  pidp = g_malloc(sizeof(pid_t));
2290  *pidp = pid;
2291  g_hash_table_insert(children, pidp, pidp);
2292  sigprocmask(SIG_SETMASK, &oldset, NULL);
2293  goto handle_connection_out;
2294  }
2295  /* child */
2296 
2297  /* Child's signal disposition is reset to default. */
2298  signal(SIGCHLD, SIG_DFL);
2299  signal(SIGTERM, SIG_DFL);
2300  signal(SIGHUP, SIG_DFL);
2301  sigemptyset(&oldset);
2302  sigprocmask(SIG_SETMASK, &oldset, NULL);
2303 
2304  g_hash_table_destroy(children);
2305  children = NULL;
2306  for(i=0;i<servers->len;i++) {
2307  close(g_array_index(servers, SERVER, i).socket);
2308  }
2309  /* FALSE does not free the
2310  actual data. This is required,
2311  because the client has a
2312  direct reference into that
2313  data, and otherwise we get a
2314  segfault... */
2315  g_array_free(servers, FALSE);
2316  for(i=0;i<modernsocks->len;i++) {
2317  close(g_array_index(modernsocks, int, i));
2318  }
2319  g_array_free(modernsocks, TRUE);
2320  }
2321 
2322  msg(LOG_INFO, "Starting to serve");
2323  serveconnection(client);
2324  exit(EXIT_SUCCESS);
2325 
2326 handle_connection_out:
2327  g_free(client);
2328  close(net);
2329 }
2330 
2331 /**
2332  * Return the index of the server whose servename matches the given
2333  * name.
2334  *
2335  * @param servename a string to match
2336  * @param servers an array of servers
2337  * @return the first index of the server whose servename matches the
2338  * given name or -1 if one cannot be found
2339  **/
2340 static int get_index_by_servename(const gchar *const servename,
2341  const GArray *const servers) {
2342  int i;
2343 
2344  for (i = 0; i < servers->len; ++i) {
2345  const SERVER server = g_array_index(servers, SERVER, i);
2346 
2347  if (strcmp(servename, server.servename) == 0)
2348  return i;
2349  }
2350 
2351  return -1;
2352 }
2353 
2354 int setup_serve(SERVER *const serve, GError **const gerror);
2355 
2356 /**
2357  * Parse configuration files and add servers to the array if they don't
2358  * already exist there. The existence is tested by comparing
2359  * servenames. A server is appended to the array only if its servename
2360  * is unique among all other servers.
2361  *
2362  * @param servers an array of servers
2363  * @return the number of new servers appended to the array, or -1 in
2364  * case of an error
2365  **/
2366 static int append_new_servers(GArray *const servers, GError **const gerror) {
2367  int i;
2368  GArray *new_servers;
2369  const int old_len = servers->len;
2370  int retval = -1;
2371  struct generic_conf genconf;
2372 
2373  new_servers = parse_cfile(config_file_pos, &genconf, true, gerror);
2374  if (!new_servers)
2375  goto out;
2376 
2377  for (i = 0; i < new_servers->len; ++i) {
2378  SERVER new_server = g_array_index(new_servers, SERVER, i);
2379 
2380  if (new_server.servename
2381  && -1 == get_index_by_servename(new_server.servename,
2382  servers)) {
2383  if (setup_serve(&new_server, gerror) == -1)
2384  goto out;
2385  if (append_serve(&new_server, servers) == -1)
2386  goto out;
2387  }
2388  }
2389 
2390  retval = servers->len - old_len;
2391 out:
2392  g_array_free(new_servers, TRUE);
2393 
2394  return retval;
2395 }
2396 
2397 /**
2398  * Loop through the available servers, and serve them. Never returns.
2399  **/
2400 void serveloop(GArray* servers) {
2401  int i;
2402  int max;
2403  fd_set mset;
2404  fd_set rset;
2405  sigset_t blocking_mask;
2406  sigset_t original_mask;
2407 
2408  /*
2409  * Set up the master fd_set. The set of descriptors we need
2410  * to select() for never changes anyway and it buys us a *lot*
2411  * of time to only build this once. However, if we ever choose
2412  * to not fork() for clients anymore, we may have to revisit
2413  * this.
2414  */
2415  max=0;
2416  FD_ZERO(&mset);
2417  for(i=0;i<servers->len;i++) {
2418  int sock;
2419  if((sock=(g_array_index(servers, SERVER, i)).socket) >= 0) {
2420  FD_SET(sock, &mset);
2421  max=sock>max?sock:max;
2422  }
2423  }
2424  for(i=0;i<modernsocks->len;i++) {
2425  int sock = g_array_index(modernsocks, int, i);
2426  FD_SET(sock, &mset);
2427  max=sock>max?sock:max;
2428  }
2429 
2430  /* Construct a signal mask which is used to make signal testing and
2431  * receiving an atomic operation to ensure no signal is received between
2432  * tests and blocking pselect(). */
2433  if (sigemptyset(&blocking_mask) == -1)
2434  err("failed to initialize blocking_mask: %m");
2435 
2436  if (sigaddset(&blocking_mask, SIGCHLD) == -1)
2437  err("failed to add SIGCHLD to blocking_mask: %m");
2438 
2439  if (sigaddset(&blocking_mask, SIGHUP) == -1)
2440  err("failed to add SIGHUP to blocking_mask: %m");
2441 
2442  if (sigaddset(&blocking_mask, SIGTERM) == -1)
2443  err("failed to add SIGTERM to blocking_mask: %m");
2444 
2445  if (sigprocmask(SIG_BLOCK, &blocking_mask, &original_mask) == -1)
2446  err("failed to block signals: %m");
2447 
2448  for(;;) {
2449  if (is_sigterm_caught) {
2450  is_sigterm_caught = 0;
2451 
2452  g_hash_table_foreach(children, killchild, NULL);
2453  unlink(pidfname);
2454 
2455  exit(EXIT_SUCCESS);
2456  }
2457 
2458  if (is_sigchld_caught) {
2459  int status;
2460  int* i;
2461  pid_t pid;
2462 
2463  is_sigchld_caught = 0;
2464 
2465  while ((pid=waitpid(-1, &status, WNOHANG)) > 0) {
2466  if (WIFEXITED(status)) {
2467  msg(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
2468  }
2469  i = g_hash_table_lookup(children, &pid);
2470  if (!i) {
2471  msg(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
2472  } else {
2473  DEBUG("Removing %d from the list of children", pid);
2474  g_hash_table_remove(children, &pid);
2475  }
2476  }
2477  }
2478 
2479  /* SIGHUP causes the root server process to reconfigure
2480  * itself and add new export servers for each newly
2481  * found export configuration group, i.e. spawn new
2482  * server processes for each previously non-existent
2483  * export. This does not alter old runtime configuration
2484  * but just appends new exports. */
2485  if (is_sighup_caught) {
2486  int n;
2487  GError *gerror = NULL;
2488 
2489  msg(LOG_INFO, "reconfiguration request received");
2490  is_sighup_caught = 0; /* Reset to allow catching
2491  * it again. */
2492 
2493  n = append_new_servers(servers, &gerror);
2494  if (n == -1)
2495  msg(LOG_ERR, "failed to append new servers: %s",
2496  gerror->message);
2497 
2498  for (i = servers->len - n; i < servers->len; ++i) {
2499  const SERVER server = g_array_index(servers,
2500  SERVER, i);
2501 
2502  if (server.socket >= 0) {
2503  FD_SET(server.socket, &mset);
2504  max = server.socket > max ? server.socket : max;
2505  }
2506 
2507  msg(LOG_INFO, "reconfigured new server: %s",
2508  server.servename);
2509  }
2510  }
2511 
2512  memcpy(&rset, &mset, sizeof(fd_set));
2513  if (pselect(max + 1, &rset, NULL, NULL, NULL, &original_mask) > 0) {
2514  DEBUG("accept, ");
2515  for(i=0; i < modernsocks->len; i++) {
2516  int sock = g_array_index(modernsocks, int, i);
2517  if(!FD_ISSET(sock, &rset)) {
2518  continue;
2519  }
2520 
2521  handle_modern_connection(servers, sock);
2522  }
2523  for(i=0; i < servers->len; i++) {
2524  SERVER *serve;
2525 
2526  serve=&(g_array_index(servers, SERVER, i));
2527  if(serve->socket < 0) {
2528  continue;
2529  }
2530  if(FD_ISSET(serve->socket, &rset)) {
2531  handle_oldstyle_connection(servers, serve);
2532  }
2533  }
2534  }
2535  }
2536 }
2537 void serveloop(GArray* servers) G_GNUC_NORETURN;
2538 
2539 /**
2540  * Set server socket options.
2541  *
2542  * @param socket a socket descriptor of the server
2543  *
2544  * @param gerror a pointer to an error object pointer used for reporting
2545  * errors. On error, if gerror is not NULL, *gerror is set and -1
2546  * is returned.
2547  *
2548  * @return 0 on success, -1 on error
2549  **/
2550 int dosockopts(const int socket, GError **const gerror) {
2551 #ifndef sun
2552  int yes=1;
2553 #else
2554  char yes='1';
2555 #endif /* sun */
2556  struct linger l;
2557 
2558  /* lose the pesky "Address already in use" error message */
2559  if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
2560  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_REUSEADDR,
2561  "failed to set socket option SO_REUSEADDR: %s",
2562  strerror(errno));
2563  return -1;
2564  }
2565  l.l_onoff = 1;
2566  l.l_linger = 10;
2567  if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
2568  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_LINGER,
2569  "failed to set socket option SO_LINGER: %s",
2570  strerror(errno));
2571  return -1;
2572  }
2573  if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
2574  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SO_KEEPALIVE,
2575  "failed to set socket option SO_KEEPALIVE: %s",
2576  strerror(errno));
2577  return -1;
2578  }
2579 
2580  return 0;
2581 }
2582 
2583 /**
2584  * Connect a server's socket.
2585  *
2586  * @param serve the server we want to connect.
2587  **/
2588 int setup_serve(SERVER *const serve, GError **const gerror) {
2589  struct addrinfo hints;
2590  struct addrinfo *ai = NULL;
2591  gchar *port = NULL;
2592  int e;
2593  int retval = -1;
2594 
2595  /* Without this, it's possible that socket == 0, even if it's
2596  * not initialized at all. And that would be wrong because 0 is
2597  * totally legal value for properly initialized descriptor. This
2598  * line is required to ensure that unused/uninitialized
2599  * descriptors are marked as such (new style configuration
2600  * case). Currently, servers are being initialized in multiple
2601  * places, and some of the them do the socket initialization
2602  * incorrectly. This is the only point common to all code paths,
2603  * and therefore setting -1 is put here. However, the whole
2604  * server initialization procedure should be extracted to its
2605  * own function and all code paths wanting to mess with servers
2606  * should initialize servers with that function.
2607  *
2608  * TODO: fix server initialization */
2609  serve->socket = -1;
2610 
2611  if(!(glob_flags & F_OLDSTYLE)) {
2612  return serve->servename ? 1 : 0;
2613  }
2614  memset(&hints,'\0',sizeof(hints));
2615  hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
2616  hints.ai_socktype = SOCK_STREAM;
2617  hints.ai_family = serve->socket_family;
2618 
2619  port = g_strdup_printf("%d", serve->port);
2620  if (!port) {
2621  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SYS,
2622  "failed to open an export socket: "
2623  "failed to convert a port number to a string: %s",
2624  strerror(errno));
2625  goto out;
2626  }
2627 
2628  e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
2629 
2630  g_free(port);
2631 
2632  if(e != 0) {
2633  g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
2634  "failed to open an export socket: "
2635  "failed to get address info: %s",
2636  gai_strerror(e));
2637  goto out;
2638  }
2639 
2640  if(serve->socket_family == AF_UNSPEC)
2641  serve->socket_family = ai->ai_family;
2642 
2643 #ifdef WITH_SDP
2644  if ((serve->flags) && F_SDP) {
2645  if (ai->ai_family == AF_INET)
2646  ai->ai_family = AF_INET_SDP;
2647  else (ai->ai_family == AF_INET6)
2648  ai->ai_family = AF_INET6_SDP;
2649  }
2650 #endif
2651  if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0) {
2652  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
2653  "failed to open an export socket: "
2654  "failed to create a socket: %s",
2655  strerror(errno));
2656  goto out;
2657  }
2658 
2659  if (dosockopts(serve->socket, gerror) == -1) {
2660  g_prefix_error(gerror, "failed to open an export socket: ");
2661  goto out;
2662  }
2663 
2664  DEBUG("Waiting for connections... bind, ");
2665  e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
2666  if (e != 0 && errno != EADDRINUSE) {
2667  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2668  "failed to open an export socket: "
2669  "failed to bind an address to a socket: %s",
2670  strerror(errno));
2671  goto out;
2672  }
2673  DEBUG("listen, ");
2674  if (listen(serve->socket, 1) < 0) {
2675  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2676  "failed to open an export socket: "
2677  "failed to start listening on a socket: %s",
2678  strerror(errno));
2679  goto out;
2680  }
2681 
2682  retval = serve->servename ? 1 : 0;
2683 out:
2684 
2685  if (retval == -1 && serve->socket >= 0) {
2686  close(serve->socket);
2687  serve->socket = -1;
2688  }
2689  freeaddrinfo (ai);
2690 
2691  return retval;
2692 }
2693 
2694 int open_unix(const gchar *const sockname, GError **const gerror) {
2695  struct sockaddr_un sa;
2696  int sock=-1;
2697  int retval=-1;
2698 
2699  memset(&sa, 0, sizeof(struct sockaddr_un));
2700  sa.sun_family = AF_UNIX;
2701  strncpy(sa.sun_path, sockname, 107);
2702  sock = socket(AF_UNIX, SOCK_STREAM, 0);
2703  if(sock < 0) {
2704  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
2705  "failed to open a unix socket: "
2706  "failed to create socket: %s",
2707  strerror(errno));
2708  goto out;
2709  }
2710  if(bind(sock, (struct sockaddr*)&sa, sizeof(struct sockaddr_un))<0) {
2711  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2712  "failed to open a unix socket: "
2713  "failed to bind to address %s: %s",
2714  sockname, strerror(errno));
2715  goto out;
2716  }
2717  if(listen(sock, 10)<0) {
2718  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2719  "failed to open a unix socket: "
2720  "failed to start listening: %s",
2721  strerror(errno));
2722  goto out;
2723  }
2724  retval=0;
2725  g_array_append_val(modernsocks, sock);
2726 out:
2727  if(retval<0 && sock >= 0) {
2728  close(sock);
2729  }
2730 
2731  return retval;
2732 }
2733 
2734 int open_modern(const gchar *const addr, const gchar *const port,
2735  GError **const gerror) {
2736  struct addrinfo hints;
2737  struct addrinfo* ai = NULL;
2738  struct addrinfo* ai_bak;
2739  struct sock_flags;
2740  int e;
2741  int retval = -1;
2742  int sock = -1;
2743 
2744  memset(&hints, '\0', sizeof(hints));
2745  hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
2746  hints.ai_socktype = SOCK_STREAM;
2747  hints.ai_family = AF_UNSPEC;
2748  hints.ai_protocol = IPPROTO_TCP;
2749  e = getaddrinfo(addr, port ? port : NBD_DEFAULT_PORT, &hints, &ai);
2750  ai_bak = ai;
2751  if(e != 0) {
2752  g_set_error(gerror, NBDS_ERR, NBDS_ERR_GAI,
2753  "failed to open a modern socket: "
2754  "failed to get address info: %s",
2755  gai_strerror(e));
2756  goto out;
2757  }
2758 
2759  while(ai != NULL) {
2760  sock = -1;
2761 
2762  if((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
2763  g_set_error(gerror, NBDS_ERR, NBDS_ERR_SOCKET,
2764  "failed to open a modern socket: "
2765  "failed to create a socket: %s",
2766  strerror(errno));
2767  goto out;
2768  }
2769 
2770  if (dosockopts(sock, gerror) == -1) {
2771  g_prefix_error(gerror, "failed to open a modern socket: ");
2772  goto out;
2773  }
2774 
2775  if(bind(sock, ai->ai_addr, ai->ai_addrlen)) {
2776  /* This is so wrong.
2777  *
2778  * Linux will return multiple entries for the
2779  * same system when we ask it for something
2780  * AF_UNSPEC, even though the first entry will
2781  * listen to both protocols. Other systems will
2782  * return multiple entries too, but we actually
2783  * do need to open both. Sigh.
2784  *
2785  * Handle it by ignoring EADDRINUSE if we've
2786  * already got at least one socket open
2787  */
2788  if(errno == EADDRINUSE && modernsocks->len > 0) {
2789  goto next;
2790  }
2791  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2792  "failed to open a modern socket: "
2793  "failed to bind an address to a socket: %s",
2794  strerror(errno));
2795  goto out;
2796  }
2797 
2798  if(listen(sock, 10) <0) {
2799  g_set_error(gerror, NBDS_ERR, NBDS_ERR_BIND,
2800  "failed to open a modern socket: "
2801  "failed to start listening on a socket: %s",
2802  strerror(errno));
2803  goto out;
2804  }
2805  g_array_append_val(modernsocks, sock);
2806  next:
2807  ai = ai->ai_next;
2808  }
2809 
2810  retval = 0;
2811 out:
2812 
2813  if (retval == -1 && sock >= 0) {
2814  close(sock);
2815  }
2816  if(ai_bak)
2817  freeaddrinfo(ai_bak);
2818 
2819  return retval;
2820 }
2821 
2822 /**
2823  * Connect our servers.
2824  **/
2825 void setup_servers(GArray *const servers, const gchar *const modernaddr,
2826  const gchar *const modernport, const gchar* unixsock) {
2827  int i;
2828  struct sigaction sa;
2829  int want_modern=0;
2830 
2831  for(i=0;i<servers->len;i++) {
2832  GError *gerror = NULL;
2833  SERVER *server = &g_array_index(servers, SERVER, i);
2834  int ret;
2835 
2836  ret = setup_serve(server, &gerror);
2837  if (ret == -1) {
2838  msg(LOG_ERR, "failed to setup servers: %s",
2839  gerror->message);
2840  g_clear_error(&gerror);
2841  exit(EXIT_FAILURE);
2842  }
2843  want_modern |= ret;
2844  }
2845  if(want_modern) {
2846  GError *gerror = NULL;
2847  if (open_modern(modernaddr, modernport, &gerror) == -1) {
2848  msg(LOG_ERR, "failed to setup servers: %s",
2849  gerror->message);
2850  g_clear_error(&gerror);
2851  exit(EXIT_FAILURE);
2852  }
2853  }
2854  if(unixsock != NULL) {
2855  GError* gerror = NULL;
2856  if(open_unix(unixsock, &gerror) == -1) {
2857  msg(LOG_ERR, "failed to setup servers: %s",
2858  gerror->message);
2859  g_clear_error(&gerror);
2860  exit(EXIT_FAILURE);
2861  }
2862  }
2863  children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
2864 
2865  sa.sa_handler = sigchld_handler;
2866  sigemptyset(&sa.sa_mask);
2867  sigaddset(&sa.sa_mask, SIGTERM);
2868  sa.sa_flags = SA_RESTART;
2869  if(sigaction(SIGCHLD, &sa, NULL) == -1)
2870  err("sigaction: %m");
2871 
2872  sa.sa_handler = sigterm_handler;
2873  sigemptyset(&sa.sa_mask);
2874  sigaddset(&sa.sa_mask, SIGCHLD);
2875  sa.sa_flags = SA_RESTART;
2876  if(sigaction(SIGTERM, &sa, NULL) == -1)
2877  err("sigaction: %m");
2878 
2879  sa.sa_handler = sighup_handler;
2880  sigemptyset(&sa.sa_mask);
2881  sa.sa_flags = SA_RESTART;
2882  if(sigaction(SIGHUP, &sa, NULL) == -1)
2883  err("sigaction: %m");
2884 }
2885 
2886 /**
2887  * Go daemon (unless we specified at compile time that we didn't want this)
2888  * @param serve the first server of our configuration. If its port is zero,
2889  * then do not daemonize, because we're doing inetd then. This parameter
2890  * is only used to create a PID file of the form
2891  * /var/run/nbd-server.&lt;port&gt;.pid; it's not modified in any way.
2892  **/
2893 #if !defined(NODAEMON)
2894 void daemonize(SERVER* serve) {
2895  FILE*pidf;
2896 
2897  if(serve && !(serve->port)) {
2898  return;
2899  }
2900  if(daemon(0,0)<0) {
2901  err("daemon");
2902  }
2903  if(!*pidftemplate) {
2904  if(serve) {
2905  strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
2906  } else {
2907  strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
2908  }
2909  }
2910  snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
2911  pidf=fopen(pidfname, "w");
2912  if(pidf) {
2913  fprintf(pidf,"%d\n", (int)getpid());
2914  fclose(pidf);
2915  } else {
2916  perror("fopen");
2917  fprintf(stderr, "Not fatal; continuing");
2918  }
2919 }
2920 #else
2921 #define daemonize(serve)
2922 #endif /* !defined(NODAEMON) */
2923 
2924 /*
2925  * Everything beyond this point (in the file) is run in non-daemon mode.
2926  * The stuff above daemonize() isn't.
2927  */
2928 
2929 /**
2930  * Set up user-ID and/or group-ID
2931  **/
2932 void dousers(const gchar *const username, const gchar *const groupname) {
2933  struct passwd *pw;
2934  struct group *gr;
2935  gchar* str;
2936  if (groupname) {
2937  gr = getgrnam(groupname);
2938  if(!gr) {
2939  str = g_strdup_printf("Invalid group name: %s", groupname);
2940  err(str);
2941  }
2942  if(setgid(gr->gr_gid)<0) {
2943  err("Could not set GID: %m");
2944  }
2945  }
2946  if (username) {
2947  pw = getpwnam(username);
2948  if(!pw) {
2949  str = g_strdup_printf("Invalid user name: %s", username);
2950  err(str);
2951  }
2952  if(setuid(pw->pw_uid)<0) {
2953  err("Could not set UID: %m");
2954  }
2955  }
2956 }
2957 
2958 #ifndef ISSERVER
2959 void glib_message_syslog_redirect(const gchar *log_domain,
2960  GLogLevelFlags log_level,
2961  const gchar *message,
2962  gpointer user_data)
2963 {
2964  int level=LOG_DEBUG;
2965 
2966  switch( log_level )
2967  {
2968  case G_LOG_FLAG_FATAL:
2969  case G_LOG_LEVEL_CRITICAL:
2970  case G_LOG_LEVEL_ERROR:
2971  level=LOG_ERR;
2972  break;
2973  case G_LOG_LEVEL_WARNING:
2974  level=LOG_WARNING;
2975  break;
2976  case G_LOG_LEVEL_MESSAGE:
2977  case G_LOG_LEVEL_INFO:
2978  level=LOG_INFO;
2979  break;
2980  case G_LOG_LEVEL_DEBUG:
2981  level=LOG_DEBUG;
2982  break;
2983  default:
2984  level=LOG_ERR;
2985  }
2986  syslog(level, "%s", message);
2987 }
2988 #endif
2989 
2990 /**
2991  * Main entry point...
2992  **/
2993 int main(int argc, char *argv[]) {
2994  SERVER *serve;
2995  GArray *servers;
2996  GError *err=NULL;
2997  struct generic_conf genconf;
2998 
2999  memset(&genconf, 0, sizeof(struct generic_conf));
3000 
3001  if (sizeof( struct nbd_request )!=28) {
3002  fprintf(stderr,"Bad size of structure. Alignment problems?\n");
3003  exit(EXIT_FAILURE) ;
3004  }
3005 
3006  memset(pidftemplate, '\0', 256);
3007 
3008  modernsocks = g_array_new(FALSE, FALSE, sizeof(int));
3009 
3010  logging(MY_NAME);
3011  config_file_pos = g_strdup(CFILE);
3012  serve=cmdline(argc, argv);
3013 
3014  servers = parse_cfile(config_file_pos, &genconf, true, &err);
3015 
3016  /* Update global variables with parsed values. This will be
3017  * removed once we get rid of global configuration variables. */
3018  glob_flags |= genconf.flags;
3019 
3020  if(serve) {
3021  serve->socket_family = AF_UNSPEC;
3022 
3023  append_serve(serve, servers);
3024 
3025  if (!(serve->port)) {
3026  CLIENT *client;
3027 #ifndef ISSERVER
3028  /* You really should define ISSERVER if you're going to use
3029  * inetd mode, but if you don't, closing stdout and stderr
3030  * (which inetd had connected to the client socket) will let it
3031  * work. */
3032  close(1);
3033  close(2);
3034  open("/dev/null", O_WRONLY);
3035  open("/dev/null", O_WRONLY);
3036  g_log_set_default_handler( glib_message_syslog_redirect, NULL );
3037 #endif
3038  client=g_malloc(sizeof(CLIENT));
3039  client->server=serve;
3040  client->net=-1;
3041  client->exportsize=OFFT_MAX;
3042  if (set_peername(0, client))
3043  exit(EXIT_FAILURE);
3044  serveconnection(client);
3045  return 0;
3046  }
3047  }
3048 
3049  if(!servers || !servers->len) {
3050  if(err && !(err->domain == NBDS_ERR
3051  && err->code == NBDS_ERR_CFILE_NOTFOUND)) {
3052  g_warning("Could not parse config file: %s",
3053  err ? err->message : "Unknown error");
3054  }
3055  }
3056  if(serve) {
3057  g_warning("Specifying an export on the command line is deprecated.");
3058  g_warning("Please use a configuration file instead.");
3059  }
3060 
3061  if((!serve) && (!servers||!servers->len)) {
3062  if(err)
3063  g_message("No configured exports; quitting.");
3064  exit(EXIT_FAILURE);
3065  }
3066  if (!dontfork)
3067  daemonize(serve);
3068  setup_servers(servers, genconf.modernaddr, genconf.modernport,
3069  genconf.unixsock);
3070  dousers(genconf.user, genconf.group);
3071 
3072  serveloop(servers);
3073 }
The (required) group "generic" is missing.
Definition: nbdsrv.h:88
int expread(off_t a, char *buf, size_t len, CLIENT *client)
Read an amount of bytes at a given offset from the right file.
Definition: nbd-server.c:1219
int setup_serve(SERVER *const serve, GError **const gerror)
Connect a server's socket.
Definition: nbd-server.c:2588
static void consume(int f, void *buf, size_t len, size_t bufsiz)
Consume data from an FD that we don't want.
Definition: nbd-server.c:298
int get_filepos(CLIENT *client, off_t a, int *fhandle, off_t *foffset, size_t *maxbytes)
Get the file handle and offset, given an export offset.
Definition: nbd-server.c:1006
This parameter is a string.
Definition: nbd-server.c:212
gchar * servename
name of the export as selected by nbd-client
Definition: nbdsrv.h:46
u64 ntohll(u64 a)
Definition: cliserv.c:74
gint flagval
Flag mask for this parameter in case ptype is PARAM_BOOL.
Definition: nbd-server.c:229
void glib_message_syslog_redirect(const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer user_data)
Definition: nbd-server.c:2959
#define NBD_OPT_LIST
Definition: cliserv.h:91
PARAM_TYPE ptype
Type of the parameter.
Definition: nbd-server.c:224
__be32 type
Definition: nbd.h:67
GArray * export
array of FILE_INFO of exported files; array size is always 1 unless we're doing the multiple file opt...
Definition: nbdsrv.h:60
Variables associated with a server.
Definition: nbdsrv.h:29
uint8_t getmaskbyte(int masklen)
Gets a byte to allow for address masking.
Definition: nbdsrv.c:93
void destroy_pid_t(gpointer data)
Destroy a pid_t*.
Definition: nbd-server.c:2068
void setup_servers(GArray *const servers, const gchar *const modernaddr, const gchar *const modernport, const gchar *unixsock)
Connect our servers.
Definition: nbd-server.c:2825
void usage()
Print out a message about how to use nbd-server.
Definition: nbd-server.c:421
void err(const char *s)
Definition: cliserv.c:56
#define TREEPAGESIZE
tree (block) files uses those chunks
Definition: nbd-server.c:136
uint32_t difffilelen
number of pages in difffile
Definition: nbdsrv.h:69
#define NBD_FLAG_SEND_FUA
Definition: nbd.h:47
#define SEND(net, reply)
sending macro.
Definition: nbd-server.c:1580
#define NBD_FLAG_C_NO_ZEROES
Definition: cliserv.h:107
#define NBD_FLAG_NO_ZEROES
Definition: cliserv.h:104
int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client)
Call rawexpread repeatedly until all data has been read.
Definition: nbd-server.c:1198
int glob_flags
global flags
Definition: nbd-server.c:124
gchar * config_file_pos
Where our config file actually is.
Definition: nbd-server.c:121
#define SYSCONFDIR
Default position of the config file.
Definition: nbd-server.c:116
#define F_COPYONWRITE
flag to tell us a file is exported using copyonwrite
Definition: nbd-server.c:142
SERVER * server
The server this client is getting data from.
Definition: nbdsrv.h:64
static void handle_oldstyle_connection(GArray *const servers, SERVER *const serve)
Definition: nbd-server.c:2231
#define G_GNUC_NORETURN
Definition: cliserv.h:62
#define F_FLUSH
Whether server wants FLUSH to be sent by the client.
Definition: nbd-server.c:149
int copyonwrite_prepare(CLIENT *client)
Definition: nbd-server.c:1876
Failed to set SO_LINGER to a socket.
Definition: nbdsrv.h:98
Definition: nbd.h:77
No virtualization.
Definition: nbdsrv.h:19
gchar * postrun
command that will be ran after the client disconnects
Definition: nbdsrv.h:44
int clientfeats
Features supported by this client.
Definition: nbdsrv.h:73
int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua)
Call rawexpwrite repeatedly until all data has been written.
Definition: nbd-server.c:1151
#define VERSION
Definition: config.h:193
gchar * paramname
Name of the parameter, as it appears in the config file.
Definition: nbd-server.c:220
void dump_section(SERVER *serve, gchar *section_header)
Definition: nbd-server.c:441
static int socket_accept(const int sock)
Definition: nbd-server.c:2109
char handle[8]
Definition: nbd.h:80
#define CFILE
Definition: nbd-server.c:118
This parameter is a boolean.
Definition: nbd-server.c:213
#define F_SDP
flag to tell us the export should be done using the Socket Direct Protocol for RDMA ...
Definition: nbd-server.c:147
Failed to bind an address to socket.
Definition: nbdsrv.h:103
void serveloop(GArray *servers)
Loop through the available servers, and serve them.
Definition: nbd-server.c:2400
#define msg(prio,...)
Logging macros.
Definition: nbdsrv.h:117
static void handle_modern_connection(GArray *const servers, const int sock)
Definition: nbd-server.c:2124
Error occurred during readdir()
Definition: nbdsrv.h:97
int fhandle
file descriptor
Definition: nbd-server.c:202
int dosockopts(const int socket, GError **const gerror)
Set server socket options.
Definition: nbd-server.c:2550
static int get_index_by_servename(const gchar *const servename, const GArray *const servers)
Return the index of the server whose servename matches the given name.
Definition: nbd-server.c:2340
gchar * modernport
port of the modern socket
Definition: nbd-server.c:240
gchar * user
user we run the server as
Definition: nbd-server.c:237
#define NBD_REP_ERR_INVALID
Definition: cliserv.h:99
int open_unix(const gchar *const sockname, GError **const gerror)
Definition: nbd-server.c:2694
off_t startoff
starting offset of this file
Definition: nbd-server.c:203
The configuration file is not found.
Definition: nbdsrv.h:87
#define fdatasync(arg)
Definition: cliserv.h:44
#define F_TEMPORARY
Whether the backing file is temporary and should be created then unlinked.
Definition: nbd-server.c:152
char pidftemplate[256]
template to be used for the filename of the PID file
Definition: nbd-server.c:163
#define NBD_REP_ERR_POLICY
Definition: cliserv.h:98
static pid_t spawn_child()
Definition: nbd-server.c:2073
static volatile sig_atomic_t is_sigchld_caught
Flag set by SIGCHLD handler to mark a child exit.
Definition: nbd-server.c:171
gchar * cowdir
directory for copy-on-write diff files.
Definition: nbdsrv.h:49
static int append_new_servers(GArray *const servers, GError **const gerror)
Parse configuration files and add servers to the array if they don't already exist there...
Definition: nbd-server.c:2366
int expflush(CLIENT *client)
Flush data to a client.
Definition: nbd-server.c:1319
struct sockaddr_storage clientaddr
peer, in binary format, network byte order
Definition: nbdsrv.h:58
int flags
flags associated with this exported file
Definition: nbdsrv.h:36
This parameter is an integer.
Definition: nbd-server.c:210
#define F_LIST
Allow clients to list the exports on a server.
Definition: nbd-server.c:159
void serveconnection(CLIENT *client)
Serve a connection.
Definition: nbd-server.c:1927
__be32 error
Definition: nbd.h:79
GArray * modernsocks
Sockets for the modern handler.
Definition: nbd-server.c:186
int exptrim(struct nbd_request *req, CLIENT *client)
Definition: nbd-server.c:1347
void killchild(gpointer key, gpointer value, gpointer user_data)
Kill a child.
Definition: nbd-server.c:963
gchar * exportname
(unprocessed) filename of the file we're exporting
Definition: nbdsrv.h:30
int net
The actual client socket.
Definition: nbdsrv.h:63
#define NBD_FLAG_SEND_FLUSH
Definition: nbd.h:46
static void sigchld_handler(const int s G_GNUC_UNUSED)
Handle SIGCHLD by setting atomically a flag which will be evaluated in the main loop of the root serv...
Definition: nbd-server.c:951
static void construct_path(char *name, int lenmax, off_t size, off_t pos, off_t *ppos)
Tree structure helper functions.
Definition: nbd-server.c:331
#define F_AUTOREADONLY
flag to tell us a file is set to autoreadonly
Definition: nbd-server.c:145
gpointer target
Pointer to where the data of this parameter should be written.
Definition: nbd-server.c:225
int do_run(gchar *command, gchar *file)
Run a command.
Definition: nbd-server.c:1907
static volatile sig_atomic_t is_sigterm_caught
Flag set by SIGTERM handler to mark a exit request.
Definition: nbd-server.c:176
#define NBD_OPT_EXPORT_NAME
Definition: cliserv.h:89
void setmysockopt(int sock)
Definition: cliserv.c:12
Underlying system call or library error.
Definition: nbdsrv.h:105
int open_modern(const gchar *const addr, const gchar *const port, GError **const gerror)
Definition: nbd-server.c:2734
unsigned int port
port we're exporting this file at
Definition: nbdsrv.h:34
gboolean required
Whether this is a required (as opposed to optional) parameter.
Definition: nbd-server.c:222
#define NBD_REP_ACK
Definition: cliserv.h:94
void err_nonfatal(const char *s)
Definition: cliserv.c:30
#define F_TRIM
Whether server wants TRIM (discard) to be sent by the client.
Definition: nbd-server.c:153
gchar * transactionlog
filename for transaction log
Definition: nbdsrv.h:48
Every subnet in its own directory.
Definition: nbdsrv.h:23
gchar * listenaddr
The IP address we're listening on.
Definition: nbdsrv.h:33
#define F_OLDSTYLE
Global flags:
Definition: nbd-server.c:158
Failed to set SO_KEEPALIVE to a socket.
Definition: nbdsrv.h:100
CLIENT * negotiate(int net, GArray *servers)
Do the initial negotiation.
Definition: nbd-server.c:1472
#define NBD_OPT_ABORT
Definition: cliserv.h:90
char * clientname
peer, in human-readable format
Definition: nbdsrv.h:57
static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void *data)
Definition: nbd-server.c:1385
#define AI_NUMERICSERV
Definition: netdb-compat.h:21
static const char * getcommandname(uint64_t command)
Translate a command name into human readable form.
Definition: nbd-server.c:251
GArray * do_cfile_dir(gchar *dir, struct generic_conf *const genconf, GError **e)
Parse config file snippets in a directory.
Definition: nbd-server.c:632
int socket
The socket of this server.
Definition: nbdsrv.h:37
Variables associated with a client connection.
Definition: nbdsrv.h:55
gchar * modernaddr
address of the modern socket
Definition: nbd-server.c:239
bool logged_oversized
whether we logged oversized requests already
Definition: nbd-server.c:196
int difffile
filedescriptor of copyonwrite file.
Definition: nbdsrv.h:66
SERVER * cmdline(int argc, char *argv[])
Parse the command line.
Definition: nbd-server.c:473
GHashTable * children
Definition: nbd-server.c:161
#define OFFT_MAX
The highest value a variable of type off_t can reach.
Definition: nbd-server.c:133
int set_peername(int net, CLIENT *client)
Find the name of the file we have to serve.
Definition: nbd-server.c:1970
static int open_treefile(char *name, mode_t mode, off_t size, off_t pos)
Definition: nbd-server.c:361
gint flags
global flags
Definition: nbd-server.c:242
#define NBD_FLAG_HAS_FLAGS
Definition: nbd.h:44
static void readit(int f, void *buf, size_t len)
Read data from a file descriptor into a buffer.
Definition: nbd-server.c:275
char pidfname[256]
name of our PID file
Definition: nbd-server.c:162
Failed to get address info.
Definition: nbdsrv.h:101
Variables associated with an open file.
Definition: nbd-server.c:201
int dontfork
Definition: nbd-server.c:127
uint32_t * difmap
see comment on the global difmap for this one
Definition: nbdsrv.h:70
static void sighup_handler(const int s G_GNUC_UNUSED)
Handle SIGHUP by setting atomically a flag which will be evaluated in the main loop of the root serve...
Definition: nbd-server.c:991
#define PARAM_OFFT
Definition: lfs.h:10
static void handle_list(uint32_t opt, int net, GArray *servers, uint32_t cflags)
Definition: nbd-server.c:1440
void daemonize(SERVER *serve)
Go daemon (unless we specified at compile time that we didn't want this)
Definition: nbd-server.c:2894
static CLIENT * handle_export_name(uint32_t opt, int net, GArray *servers, uint32_t cflags)
Definition: nbd-server.c:1404
uint64_t size_autodetect(int fhandle)
Detect the size of a file.
Definition: nbdsrv.c:252
#define MY_NAME
Definition: nbd-server.c:105
#define F_SPARSE
flag to tell us copyronwrite should use a sparse file
Definition: nbd-server.c:146
PARAM_TYPE
Type of configuration file values.
Definition: nbd-server.c:209
ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client)
Read an amount of bytes at a given offset from the right file.
Definition: nbd-server.c:1173
Literal IP address as part of the filename.
Definition: nbdsrv.h:20
#define TREEDIRSIZE
number of files per subdirectory (or subdirs per subdirectory)
Definition: nbd-server.c:137
int transactionlogfd
fd for transaction log
Definition: nbdsrv.h:72
const u64 opts_magic
Definition: cliserv.c:9
void myseek(int handle, off_t a)
seek to a position in a file, with error handling.
Definition: nbd-server.c:1060
#define NBD_CMD_FLAG_FUA
Definition: nbd.h:41
#define F_TREEFILES
flag to tell us a file is exported using -t
Definition: nbd-server.c:155
int max_connections
maximum number of opened connections
Definition: nbdsrv.h:47
#define NBD_D_TYPE
Definition: nbd-server.c:620
VIRT_STYLE virtstyle
The style of virtualization, if any.
Definition: nbdsrv.h:39
#define NBD_FLAG_READ_ONLY
Definition: nbd.h:45
#define G_GNUC_UNUSED
Definition: cliserv.h:63
static volatile sig_atomic_t is_sighup_caught
Flag set by SIGHUP handler to mark a reconfiguration request.
Definition: nbd-server.c:181
GArray * parse_cfile(gchar *f, struct generic_conf *genconf, bool expect_generic, GError **e)
Parse the config file.
Definition: nbd-server.c:713
Failed to set SO_REUSEADDR to a socket.
Definition: nbdsrv.h:99
__be32 len
Definition: nbd.h:70
int append_serve(const SERVER *const s, GArray *const a)
append new server to array
Definition: nbdsrv.c:192
ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua)
Write an amount of bytes at a given offset to the right file.
Definition: nbd-server.c:1077
#define BUFSIZE
Size of buffer that can hold requests.
Definition: nbd-server.c:134
__be64 from
Definition: nbd.h:69
int socket_family
family of the socket
Definition: nbdsrv.h:38
uint64_t expected_size
size of the exported file as it was told to us through configuration
Definition: nbdsrv.h:31
gchar * prerun
command to be ran after connecting a client, but before starting to serve
Definition: nbdsrv.h:42
#define NBD_FLAG_SEND_TRIM
Definition: nbd.h:49
This parameter is an integer.
Definition: nbd-server.c:211
#define htonll
Definition: cliserv.h:83
gchar * unixsock
file name of the unix domain socket
Definition: nbd-server.c:241
#define NBD_CMD_MASK_COMMAND
Definition: nbd.h:40
gchar * group
group we run running as
Definition: nbd-server.c:238
uint64_t exportsize
size of the file we're exporting
Definition: nbdsrv.h:56
#define DEBUG(...)
Definition: nbd-debug.h:8
#define F_SYNC
Whether to fsync() after a write.
Definition: nbd-server.c:148
static int nbd_errno(int errcode)
Definition: nbd-server.c:1558
char * authname
filename of the authorization file
Definition: nbdsrv.h:35
int main(int argc, char *argv[])
Main entry point...
Definition: nbd-server.c:2993
static void writeit(int f, void *buf, size_t len)
Write data from a buffer into a filedescriptor.
Definition: nbd-server.c:314
static void mkdir_path(char *path)
Definition: nbd-server.c:348
#define NBD_REPLY_MAGIC
Definition: nbd.h:58
#define NBD_REP_SERVER
Definition: cliserv.h:95
#define NBD_DEFAULT_PORT
Definition: cliserv.h:85
gboolean modern
client was negotiated using modern negotiation protocol
Definition: nbdsrv.h:71
__be32 magic
Definition: nbd.h:78
#define F_FUA
Whether server wants FUA to be sent by the client.
Definition: nbd-server.c:150
void setupexport(CLIENT *client)
Set up client export array, which is an array of FILE_INFO.
Definition: nbd-server.c:1764
A config file was specified that does not define any exports.
Definition: nbdsrv.h:92
#define NBD_REQUEST_MAGIC
Definition: nbd.h:57
char * difffilename
filename of the copy-on-write file, if any
Definition: nbdsrv.h:65
void dousers(const gchar *const username, const gchar *const groupname)
Set up user-ID and/or group-ID.
Definition: nbd-server.c:2932
#define NBDS_ERR
Error domain common for all NBD server errors.
Definition: nbdsrv.h:81
char handle[8]
Definition: nbd.h:39
char handle[8]
Definition: nbd.h:68
char default_authname[]
default name of allow file
Definition: nbd-server.c:164
#define DIFFPAGESIZE
diff file uses those chunks
Definition: nbd-server.c:135
Replacing all dots in an ip address by a / before doing the same as in IPLIT.
Definition: nbdsrv.h:21
uint8_t cidrlen
The length of the mask when we use CIDR-style virtualization.
Definition: nbdsrv.h:40
static void sigterm_handler(const int s G_GNUC_UNUSED)
Handle SIGTERM by setting atomically a flag which will be evaluated in the main loop of the root serv...
Definition: nbd-server.c:978
#define NBD_FLAG_FIXED_NEWSTYLE
Definition: cliserv.h:103
#define INIT_PASSWD
Definition: cliserv.h:71
static void delete_treefile(char *name, off_t size, off_t pos)
Definition: nbd-server.c:403
#define F_READONLY
Per-export flags:
Definition: nbd-server.c:140
__be32 magic
Definition: nbd.h:37
A value is not supported in this build.
Definition: nbdsrv.h:91
#define NBD_FLAG_ROTATIONAL
Definition: nbd.h:48
A directory requested does not exist.
Definition: nbdsrv.h:96
int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua)
Write an amount of bytes at a given offset to the right file.
Definition: nbd-server.c:1261
char * exportname
(processed) filename of the file we're exporting
Definition: nbdsrv.h:59
#define ERROR(client, reply, errcode)
error macro.
Definition: nbd-server.c:1584
Configuration file values of the "generic" section.
Definition: nbd-server.c:236
void send_export_info(CLIENT *client)
Definition: nbd-server.c:1531
Configuration file values.
Definition: nbd-server.c:219
#define F_ROTATIONAL
Whether server wants the client to implement the elevator algorithm.
Definition: nbd-server.c:151
A value is syntactically invalid.
Definition: nbdsrv.h:90
__be32 len
Definition: nbd.h:41
int mainloop(CLIENT *client)
Serve a file to a single client.
Definition: nbd-server.c:1594
__be32 magic
Definition: nbd.h:66
#define F_MULTIFILE
flag to tell us a file is exported using -m
Definition: nbd-server.c:141
void logging(const char *name)
Definition: cliserv.c:61
#define F_NO_ZEROES
Do not send zeros to client.
Definition: nbd-server.c:160
Failed to create a socket.
Definition: nbdsrv.h:102
int authorized_client(CLIENT *opts)
Check whether a client is allowed to connect.
Definition: nbdsrv.c:105
#define NBD_REP_ERR_UNSUP
Definition: cliserv.h:97