1 /*
2     Simple prototype Xen Store Daemon providing simple tree-like database.
3     Copyright (C) 2005 Rusty Russell IBM Corporation
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include <inttypes.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <poll.h>
23 #ifndef NO_SOCKETS
24 #include <sys/socket.h>
25 #include <sys/un.h>
26 #endif
27 #include <sys/time.h>
28 #include <time.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdbool.h>
32 #include <stdio.h>
33 #include <stdarg.h>
34 #include <stdlib.h>
35 #include <syslog.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <dirent.h>
39 #include <getopt.h>
40 #include <signal.h>
41 #include <assert.h>
42 #include <setjmp.h>
43 
44 #include <xenevtchn.h>
45 
46 #include "utils.h"
47 #include "list.h"
48 #include "talloc.h"
49 #include "xenstore_lib.h"
50 #include "xenstored_core.h"
51 #include "xenstored_watch.h"
52 #include "xenstored_transaction.h"
53 #include "xenstored_domain.h"
54 #include "xenstored_control.h"
55 #include "tdb.h"
56 
57 #ifndef NO_SOCKETS
58 #if defined(HAVE_SYSTEMD)
59 #define XEN_SYSTEMD_ENABLED 1
60 #endif
61 #endif
62 
63 #if defined(XEN_SYSTEMD_ENABLED)
64 #include <systemd/sd-daemon.h>
65 #endif
66 
67 extern xenevtchn_handle *xce_handle; /* in xenstored_domain.c */
68 static int xce_pollfd_idx = -1;
69 static struct pollfd *fds;
70 static unsigned int current_array_size;
71 static unsigned int nr_fds;
72 
73 #define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
74 
75 static bool verbose = false;
76 LIST_HEAD(connections);
77 int tracefd = -1;
78 static bool recovery = true;
79 static int reopen_log_pipe[2];
80 static int reopen_log_pipe0_pollfd_idx = -1;
81 char *tracefile = NULL;
82 TDB_CONTEXT *tdb_ctx = NULL;
83 
84 static const char *sockmsg_string(enum xsd_sockmsg_type type);
85 
86 #define log(...)							\
87 	do {								\
88 		char *s = talloc_asprintf(NULL, __VA_ARGS__);		\
89 		if (s) {						\
90 			trace("%s\n", s);				\
91 			syslog(LOG_ERR, "%s",  s);			\
92 			talloc_free(s);					\
93 		} else {						\
94 			trace("talloc failure during logging\n");	\
95 			syslog(LOG_ERR, "talloc failure during logging\n"); \
96 		}							\
97 	} while (0)
98 
99 
100 int quota_nb_entry_per_domain = 1000;
101 int quota_nb_watch_per_domain = 128;
102 int quota_max_entry_size = 2048; /* 2K */
103 int quota_max_transaction = 10;
104 
trace(const char * fmt,...)105 void trace(const char *fmt, ...)
106 {
107 	va_list arglist;
108 	char *str;
109 	char sbuf[1024];
110 	int ret, dummy;
111 
112 	if (tracefd < 0)
113 		return;
114 
115 	/* try to use a static buffer */
116 	va_start(arglist, fmt);
117 	ret = vsnprintf(sbuf, 1024, fmt, arglist);
118 	va_end(arglist);
119 
120 	if (ret <= 1024) {
121 		dummy = write(tracefd, sbuf, ret);
122 		return;
123 	}
124 
125 	/* fail back to dynamic allocation */
126 	va_start(arglist, fmt);
127 	str = talloc_vasprintf(NULL, fmt, arglist);
128 	va_end(arglist);
129 	if (str) {
130 		dummy = write(tracefd, str, strlen(str));
131 		talloc_free(str);
132 	}
133 }
134 
trace_io(const struct connection * conn,const struct buffered_data * data,int out)135 static void trace_io(const struct connection *conn,
136 		     const struct buffered_data *data,
137 		     int out)
138 {
139 	unsigned int i;
140 	time_t now;
141 	struct tm *tm;
142 
143 #ifdef HAVE_DTRACE
144 	dtrace_io(conn, data, out);
145 #endif
146 
147 	if (tracefd < 0)
148 		return;
149 
150 	now = time(NULL);
151 	tm = localtime(&now);
152 
153 	trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (",
154 	      out ? "OUT" : "IN", conn,
155 	      tm->tm_year + 1900, tm->tm_mon + 1,
156 	      tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
157 	      sockmsg_string(data->hdr.msg.type));
158 
159 	for (i = 0; i < data->hdr.msg.len; i++)
160 		trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
161 	trace(")\n");
162 }
163 
trace_create(const void * data,const char * type)164 void trace_create(const void *data, const char *type)
165 {
166 	trace("CREATE %s %p\n", type, data);
167 }
168 
trace_destroy(const void * data,const char * type)169 void trace_destroy(const void *data, const char *type)
170 {
171 	trace("DESTROY %s %p\n", type, data);
172 }
173 
174 /**
175  * Signal handler for SIGHUP, which requests that the trace log is reopened
176  * (in the main loop).  A single byte is written to reopen_log_pipe, to awaken
177  * the poll() in the main loop.
178  */
trigger_reopen_log(int signal)179 static void trigger_reopen_log(int signal __attribute__((unused)))
180 {
181 	char c = 'A';
182 	int dummy;
183 	dummy = write(reopen_log_pipe[1], &c, 1);
184 }
185 
close_log(void)186 void close_log(void)
187 {
188 	if (tracefd >= 0)
189 		close(tracefd);
190 	tracefd = -1;
191 }
192 
reopen_log(void)193 void reopen_log(void)
194 {
195 	if (tracefile) {
196 		close_log();
197 
198 		tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
199 
200 		if (tracefd < 0)
201 			perror("Could not open tracefile");
202 		else
203 			trace("\n***\n");
204 	}
205 }
206 
write_messages(struct connection * conn)207 static bool write_messages(struct connection *conn)
208 {
209 	int ret;
210 	struct buffered_data *out;
211 
212 	out = list_top(&conn->out_list, struct buffered_data, list);
213 	if (out == NULL)
214 		return true;
215 
216 	if (out->inhdr) {
217 		if (verbose)
218 			xprintf("Writing msg %s (%.*s) out to %p\n",
219 				sockmsg_string(out->hdr.msg.type),
220 				out->hdr.msg.len,
221 				out->buffer, conn);
222 		ret = conn->write(conn, out->hdr.raw + out->used,
223 				  sizeof(out->hdr) - out->used);
224 		if (ret < 0)
225 			return false;
226 
227 		out->used += ret;
228 		if (out->used < sizeof(out->hdr))
229 			return true;
230 
231 		out->inhdr = false;
232 		out->used = 0;
233 
234 		/* Second write might block if non-zero. */
235 		if (out->hdr.msg.len && !conn->domain)
236 			return true;
237 	}
238 
239 	ret = conn->write(conn, out->buffer + out->used,
240 			  out->hdr.msg.len - out->used);
241 	if (ret < 0)
242 		return false;
243 
244 	out->used += ret;
245 	if (out->used != out->hdr.msg.len)
246 		return true;
247 
248 	trace_io(conn, out, 1);
249 
250 	list_del(&out->list);
251 	talloc_free(out);
252 
253 	return true;
254 }
255 
destroy_conn(void * _conn)256 static int destroy_conn(void *_conn)
257 {
258 	struct connection *conn = _conn;
259 
260 	/* Flush outgoing if possible, but don't block. */
261 	if (!conn->domain) {
262 		struct pollfd pfd;
263 		pfd.fd = conn->fd;
264 		pfd.events = POLLOUT;
265 
266 		while (!list_empty(&conn->out_list)
267 		       && poll(&pfd, 1, 0) == 1)
268 			if (!write_messages(conn))
269 				break;
270 		close(conn->fd);
271 	}
272         if (conn->target)
273                 talloc_unlink(conn, conn->target);
274 	list_del(&conn->list);
275 	trace_destroy(conn, "connection");
276 	return 0;
277 }
278 
279 /* This function returns index inside the array if succeed, -1 if fail */
set_fd(int fd,short events)280 static int set_fd(int fd, short events)
281 {
282 	int ret;
283 	if (current_array_size < nr_fds + 1) {
284 		struct pollfd *new_fds = NULL;
285 		unsigned long newsize;
286 
287 		/* Round up to 2^8 boundary, in practice this just
288 		 * make newsize larger than current_array_size.
289 		 */
290 		newsize = ROUNDUP(nr_fds + 1, 8);
291 
292 		new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
293 		if (!new_fds)
294 			goto fail;
295 		fds = new_fds;
296 
297 		memset(&fds[0] + current_array_size, 0,
298 		       sizeof(struct pollfd ) * (newsize-current_array_size));
299 		current_array_size = newsize;
300 	}
301 
302 	fds[nr_fds].fd = fd;
303 	fds[nr_fds].events = events;
304 	ret = nr_fds;
305 	nr_fds++;
306 
307 	return ret;
308 fail:
309 	syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
310 	return -1;
311 }
312 
initialize_fds(int sock,int * p_sock_pollfd_idx,int ro_sock,int * p_ro_sock_pollfd_idx,int * ptimeout)313 static void initialize_fds(int sock, int *p_sock_pollfd_idx,
314 			   int ro_sock, int *p_ro_sock_pollfd_idx,
315 			   int *ptimeout)
316 {
317 	struct connection *conn;
318 	struct wrl_timestampt now;
319 
320 	if (fds)
321 		memset(fds, 0, sizeof(struct pollfd) * current_array_size);
322 	nr_fds = 0;
323 
324 	*ptimeout = -1;
325 
326 	if (sock != -1)
327 		*p_sock_pollfd_idx = set_fd(sock, POLLIN|POLLPRI);
328 	if (ro_sock != -1)
329 		*p_ro_sock_pollfd_idx = set_fd(ro_sock, POLLIN|POLLPRI);
330 	if (reopen_log_pipe[0] != -1)
331 		reopen_log_pipe0_pollfd_idx =
332 			set_fd(reopen_log_pipe[0], POLLIN|POLLPRI);
333 
334 	if (xce_handle != NULL)
335 		xce_pollfd_idx = set_fd(xenevtchn_fd(xce_handle),
336 					POLLIN|POLLPRI);
337 
338 	wrl_gettime_now(&now);
339 	wrl_log_periodic(now);
340 
341 	list_for_each_entry(conn, &connections, list) {
342 		if (conn->domain) {
343 			wrl_check_timeout(conn->domain, now, ptimeout);
344 			if (domain_can_read(conn) ||
345 			    (domain_can_write(conn) &&
346 			     !list_empty(&conn->out_list)))
347 				*ptimeout = 0;
348 		} else {
349 			short events = POLLIN|POLLPRI;
350 			if (!list_empty(&conn->out_list))
351 				events |= POLLOUT;
352 			conn->pollfd_idx = set_fd(conn->fd, events);
353 		}
354 	}
355 }
356 
357 /*
358  * If it fails, returns NULL and sets errno.
359  * Temporary memory allocations will be done with ctx.
360  */
read_node(struct connection * conn,const void * ctx,const char * name)361 static struct node *read_node(struct connection *conn, const void *ctx,
362 			      const char *name)
363 {
364 	TDB_DATA key, data;
365 	struct xs_tdb_record_hdr *hdr;
366 	struct node *node;
367 
368 	node = talloc(ctx, struct node);
369 	if (!node) {
370 		errno = ENOMEM;
371 		return NULL;
372 	}
373 	node->name = talloc_strdup(node, name);
374 	if (!node->name) {
375 		talloc_free(node);
376 		errno = ENOMEM;
377 		return NULL;
378 	}
379 
380 	if (transaction_prepend(conn, name, &key))
381 		return NULL;
382 
383 	data = tdb_fetch(tdb_ctx, key);
384 
385 	if (data.dptr == NULL) {
386 		if (tdb_error(tdb_ctx) == TDB_ERR_NOEXIST) {
387 			node->generation = NO_GENERATION;
388 			access_node(conn, node, NODE_ACCESS_READ, NULL);
389 			errno = ENOENT;
390 		} else {
391 			log("TDB error on read: %s", tdb_errorstr(tdb_ctx));
392 			errno = EIO;
393 		}
394 		talloc_free(node);
395 		return NULL;
396 	}
397 
398 	node->parent = NULL;
399 	talloc_steal(node, data.dptr);
400 
401 	/* Datalen, childlen, number of permissions */
402 	hdr = (void *)data.dptr;
403 	node->generation = hdr->generation;
404 	node->num_perms = hdr->num_perms;
405 	node->datalen = hdr->datalen;
406 	node->childlen = hdr->childlen;
407 
408 	/* Permissions are struct xs_permissions. */
409 	node->perms = hdr->perms;
410 	/* Data is binary blob (usually ascii, no nul). */
411 	node->data = node->perms + node->num_perms;
412 	/* Children is strings, nul separated. */
413 	node->children = node->data + node->datalen;
414 
415 	access_node(conn, node, NODE_ACCESS_READ, NULL);
416 
417 	return node;
418 }
419 
write_node_raw(struct connection * conn,TDB_DATA * key,struct node * node)420 int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node)
421 {
422 	TDB_DATA data;
423 	void *p;
424 	struct xs_tdb_record_hdr *hdr;
425 
426 	data.dsize = sizeof(*hdr)
427 		+ node->num_perms*sizeof(node->perms[0])
428 		+ node->datalen + node->childlen;
429 
430 	if (domain_is_unprivileged(conn) &&
431 	    data.dsize >= quota_max_entry_size) {
432 		errno = ENOSPC;
433 		return errno;
434 	}
435 
436 	data.dptr = talloc_size(node, data.dsize);
437 	hdr = (void *)data.dptr;
438 	hdr->generation = node->generation;
439 	hdr->num_perms = node->num_perms;
440 	hdr->datalen = node->datalen;
441 	hdr->childlen = node->childlen;
442 
443 	memcpy(hdr->perms, node->perms, node->num_perms*sizeof(node->perms[0]));
444 	p = hdr->perms + node->num_perms;
445 	memcpy(p, node->data, node->datalen);
446 	p += node->datalen;
447 	memcpy(p, node->children, node->childlen);
448 
449 	/* TDB should set errno, but doesn't even set ecode AFAICT. */
450 	if (tdb_store(tdb_ctx, *key, data, TDB_REPLACE) != 0) {
451 		corrupt(conn, "Write of %s failed", key->dptr);
452 		errno = EIO;
453 		return errno;
454 	}
455 	return 0;
456 }
457 
write_node(struct connection * conn,struct node * node)458 static int write_node(struct connection *conn, struct node *node)
459 {
460 	TDB_DATA key;
461 
462 	if (access_node(conn, node, NODE_ACCESS_WRITE, &key))
463 		return errno;
464 
465 	return write_node_raw(conn, &key, node);
466 }
467 
perm_for_conn(struct connection * conn,struct xs_permissions * perms,unsigned int num)468 static enum xs_perm_type perm_for_conn(struct connection *conn,
469 				       struct xs_permissions *perms,
470 				       unsigned int num)
471 {
472 	unsigned int i;
473 	enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
474 
475 	if (!conn->can_write)
476 		mask &= ~XS_PERM_WRITE;
477 
478 	/* Owners and tools get it all... */
479 	if (!domain_is_unprivileged(conn) || perms[0].id == conn->id
480                 || (conn->target && perms[0].id == conn->target->id))
481 		return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
482 
483 	for (i = 1; i < num; i++)
484 		if (perms[i].id == conn->id
485                         || (conn->target && perms[i].id == conn->target->id))
486 			return perms[i].perms & mask;
487 
488 	return perms[0].perms & mask;
489 }
490 
491 /*
492  * Get name of node parent.
493  * Temporary memory allocations are done with ctx.
494  */
get_parent(const void * ctx,const char * node)495 static char *get_parent(const void *ctx, const char *node)
496 {
497 	char *parent;
498 	char *slash = strrchr(node + 1, '/');
499 
500 	parent = slash ? talloc_asprintf(ctx, "%.*s", (int)(slash - node), node)
501 		       : talloc_strdup(ctx, "/");
502 	if (!parent)
503 		errno = ENOMEM;
504 
505 	return parent;
506 }
507 
508 /*
509  * What do parents say?
510  * Temporary memory allocations are done with ctx.
511  */
ask_parents(struct connection * conn,const void * ctx,const char * name,enum xs_perm_type * perm)512 static int ask_parents(struct connection *conn, const void *ctx,
513 		       const char *name, enum xs_perm_type *perm)
514 {
515 	struct node *node;
516 
517 	do {
518 		name = get_parent(ctx, name);
519 		if (!name)
520 			return errno;
521 		node = read_node(conn, ctx, name);
522 		if (node)
523 			break;
524 		if (errno == ENOMEM)
525 			return errno;
526 	} while (!streq(name, "/"));
527 
528 	/* No permission at root?  We're in trouble. */
529 	if (!node) {
530 		corrupt(conn, "No permissions file at root");
531 		*perm = XS_PERM_NONE;
532 		return 0;
533 	}
534 
535 	*perm = perm_for_conn(conn, node->perms, node->num_perms);
536 	return 0;
537 }
538 
539 /*
540  * We have a weird permissions system.  You can allow someone into a
541  * specific node without allowing it in the parents.  If it's going to
542  * fail, however, we don't want the errno to indicate any information
543  * about the node.
544  * Temporary memory allocations are done with ctx.
545  */
errno_from_parents(struct connection * conn,const void * ctx,const char * node,int errnum,enum xs_perm_type perm)546 static int errno_from_parents(struct connection *conn, const void *ctx,
547 			      const char *node, int errnum,
548 			      enum xs_perm_type perm)
549 {
550 	enum xs_perm_type parent_perm = XS_PERM_NONE;
551 
552 	/* We always tell them about memory failures. */
553 	if (errnum == ENOMEM)
554 		return errnum;
555 
556 	if (ask_parents(conn, ctx, node, &parent_perm))
557 		return errno;
558 	if (parent_perm & perm)
559 		return errnum;
560 	return EACCES;
561 }
562 
563 /*
564  * If it fails, returns NULL and sets errno.
565  * Temporary memory allocations are done with ctx.
566  */
get_node(struct connection * conn,const void * ctx,const char * name,enum xs_perm_type perm)567 struct node *get_node(struct connection *conn,
568 		      const void *ctx,
569 		      const char *name,
570 		      enum xs_perm_type perm)
571 {
572 	struct node *node;
573 
574 	if (!name || !is_valid_nodename(name)) {
575 		errno = EINVAL;
576 		return NULL;
577 	}
578 	node = read_node(conn, ctx, name);
579 	/* If we don't have permission, we don't have node. */
580 	if (node) {
581 		if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
582 		    != perm) {
583 			errno = EACCES;
584 			node = NULL;
585 		}
586 	}
587 	/* Clean up errno if they weren't supposed to know. */
588 	if (!node && errno != ENOMEM)
589 		errno = errno_from_parents(conn, ctx, name, errno, perm);
590 	return node;
591 }
592 
new_buffer(void * ctx)593 static struct buffered_data *new_buffer(void *ctx)
594 {
595 	struct buffered_data *data;
596 
597 	data = talloc_zero(ctx, struct buffered_data);
598 	if (data == NULL)
599 		return NULL;
600 
601 	data->inhdr = true;
602 	return data;
603 }
604 
605 /* Return length of string (including nul) at this offset.
606  * If there is no nul, returns 0 for failure.
607  */
get_string(const struct buffered_data * data,unsigned int offset)608 static unsigned int get_string(const struct buffered_data *data,
609 			       unsigned int offset)
610 {
611 	const char *nul;
612 
613 	if (offset >= data->used)
614 		return 0;
615 
616 	nul = memchr(data->buffer + offset, 0, data->used - offset);
617 	if (!nul)
618 		return 0;
619 
620 	return nul - (data->buffer + offset) + 1;
621 }
622 
623 /* Break input into vectors, return the number, fill in up to num of them.
624  * Always returns the actual number of nuls in the input.  Stores the
625  * positions of the starts of the nul-terminated strings in vec.
626  * Callers who use this and then rely only on vec[] will
627  * ignore any data after the final nul.
628  */
get_strings(struct buffered_data * data,char * vec[],unsigned int num)629 unsigned int get_strings(struct buffered_data *data,
630 			 char *vec[], unsigned int num)
631 {
632 	unsigned int off, i, len;
633 
634 	off = i = 0;
635 	while ((len = get_string(data, off)) != 0) {
636 		if (i < num)
637 			vec[i] = data->buffer + off;
638 		i++;
639 		off += len;
640 	}
641 	return i;
642 }
643 
send_error(struct connection * conn,int error)644 static void send_error(struct connection *conn, int error)
645 {
646 	unsigned int i;
647 
648 	for (i = 0; error != xsd_errors[i].errnum; i++) {
649 		if (i == ARRAY_SIZE(xsd_errors) - 1) {
650 			eprintf("xenstored: error %i untranslatable", error);
651 			i = 0; /* EINVAL */
652 			break;
653 		}
654 	}
655 	send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
656 			  strlen(xsd_errors[i].errstring) + 1);
657 }
658 
send_reply(struct connection * conn,enum xsd_sockmsg_type type,const void * data,unsigned int len)659 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
660 		const void *data, unsigned int len)
661 {
662 	struct buffered_data *bdata;
663 
664 	if ( len > XENSTORE_PAYLOAD_MAX ) {
665 		send_error(conn, E2BIG);
666 		return;
667 	}
668 
669 	/* Replies reuse the request buffer, events need a new one. */
670 	if (type != XS_WATCH_EVENT) {
671 		bdata = conn->in;
672 		bdata->inhdr = true;
673 		bdata->used = 0;
674 		conn->in = NULL;
675 	} else {
676 		/* Message is a child of the connection for auto-cleanup. */
677 		bdata = new_buffer(conn);
678 
679 		/*
680 		 * Allocation failure here is unfortunate: we have no way to
681 		 * tell anybody about it.
682 		 */
683 		if (!bdata)
684 			return;
685 	}
686 	if (len <= DEFAULT_BUFFER_SIZE)
687 		bdata->buffer = bdata->default_buffer;
688 	else
689 		bdata->buffer = talloc_array(bdata, char, len);
690 	if (!bdata->buffer) {
691 		if (type == XS_WATCH_EVENT) {
692 			/* Same as above: no way to tell someone. */
693 			talloc_free(bdata);
694 			return;
695 		}
696 		/* re-establish request buffer for sending ENOMEM. */
697 		conn->in = bdata;
698 		send_error(conn, ENOMEM);
699 		return;
700 	}
701 
702 	/* Update relevant header fields and fill in the message body. */
703 	bdata->hdr.msg.type = type;
704 	bdata->hdr.msg.len = len;
705 	memcpy(bdata->buffer, data, len);
706 
707 	/* Queue for later transmission. */
708 	list_add_tail(&bdata->list, &conn->out_list);
709 
710 	return;
711 }
712 
713 /* Some routines (write, mkdir, etc) just need a non-error return */
send_ack(struct connection * conn,enum xsd_sockmsg_type type)714 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
715 {
716 	send_reply(conn, type, "OK", sizeof("OK"));
717 }
718 
valid_chars(const char * node)719 static bool valid_chars(const char *node)
720 {
721 	/* Nodes can have lots of crap. */
722 	return (strspn(node,
723 		       "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
724 		       "abcdefghijklmnopqrstuvwxyz"
725 		       "0123456789-/_@") == strlen(node));
726 }
727 
is_valid_nodename(const char * node)728 bool is_valid_nodename(const char *node)
729 {
730 	/* Must start in /. */
731 	if (!strstarts(node, "/"))
732 		return false;
733 
734 	/* Cannot end in / (unless it's just "/"). */
735 	if (strends(node, "/") && !streq(node, "/"))
736 		return false;
737 
738 	/* No double //. */
739 	if (strstr(node, "//"))
740 		return false;
741 
742 	if (strlen(node) > XENSTORE_ABS_PATH_MAX)
743 		return false;
744 
745 	return valid_chars(node);
746 }
747 
748 /* We expect one arg in the input: return NULL otherwise.
749  * The payload must contain exactly one nul, at the end.
750  */
onearg(struct buffered_data * in)751 const char *onearg(struct buffered_data *in)
752 {
753 	if (!in->used || get_string(in, 0) != in->used)
754 		return NULL;
755 	return in->buffer;
756 }
757 
perms_to_strings(const void * ctx,struct xs_permissions * perms,unsigned int num,unsigned int * len)758 static char *perms_to_strings(const void *ctx,
759 			      struct xs_permissions *perms, unsigned int num,
760 			      unsigned int *len)
761 {
762 	unsigned int i;
763 	char *strings = NULL;
764 	char buffer[MAX_STRLEN(unsigned int) + 1];
765 
766 	for (*len = 0, i = 0; i < num; i++) {
767 		if (!xs_perm_to_string(&perms[i], buffer, sizeof(buffer)))
768 			return NULL;
769 
770 		strings = talloc_realloc(ctx, strings, char,
771 					 *len + strlen(buffer) + 1);
772 		if (!strings)
773 			return NULL;
774 		strcpy(strings + *len, buffer);
775 		*len += strlen(buffer) + 1;
776 	}
777 	return strings;
778 }
779 
canonicalize(struct connection * conn,const void * ctx,const char * node)780 char *canonicalize(struct connection *conn, const void *ctx, const char *node)
781 {
782 	const char *prefix;
783 
784 	if (!node || (node[0] == '/') || (node[0] == '@'))
785 		return (char *)node;
786 	prefix = get_implicit_path(conn);
787 	if (prefix)
788 		return talloc_asprintf(ctx, "%s/%s", prefix, node);
789 	return (char *)node;
790 }
791 
get_node_canonicalized(struct connection * conn,const void * ctx,const char * name,char ** canonical_name,enum xs_perm_type perm)792 static struct node *get_node_canonicalized(struct connection *conn,
793 					   const void *ctx,
794 					   const char *name,
795 					   char **canonical_name,
796 					   enum xs_perm_type perm)
797 {
798 	char *tmp_name;
799 
800 	if (!canonical_name)
801 		canonical_name = &tmp_name;
802 	*canonical_name = canonicalize(conn, ctx, name);
803 	return get_node(conn, ctx, *canonical_name, perm);
804 }
805 
send_directory(struct connection * conn,struct buffered_data * in)806 static int send_directory(struct connection *conn, struct buffered_data *in)
807 {
808 	struct node *node;
809 
810 	node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
811 	if (!node)
812 		return errno;
813 
814 	send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
815 
816 	return 0;
817 }
818 
send_directory_part(struct connection * conn,struct buffered_data * in)819 static int send_directory_part(struct connection *conn,
820 			       struct buffered_data *in)
821 {
822 	unsigned int off, len, maxlen, genlen;
823 	char *child, *data;
824 	struct node *node;
825 	char gen[24];
826 
827 	if (xs_count_strings(in->buffer, in->used) != 2)
828 		return EINVAL;
829 
830 	/* First arg is node name. */
831 	node = get_node_canonicalized(conn, in, in->buffer, NULL, XS_PERM_READ);
832 	if (!node)
833 		return errno;
834 
835 	/* Second arg is childlist offset. */
836 	off = atoi(in->buffer + strlen(in->buffer) + 1);
837 
838 	genlen = snprintf(gen, sizeof(gen), "%"PRIu64, node->generation) + 1;
839 
840 	/* Offset behind list: just return a list with an empty string. */
841 	if (off >= node->childlen) {
842 		gen[genlen] = 0;
843 		send_reply(conn, XS_DIRECTORY_PART, gen, genlen + 1);
844 		return 0;
845 	}
846 
847 	len = 0;
848 	maxlen = XENSTORE_PAYLOAD_MAX - genlen - 1;
849 	child = node->children + off;
850 
851 	while (len + strlen(child) < maxlen) {
852 		len += strlen(child) + 1;
853 		child += strlen(child) + 1;
854 		if (off + len == node->childlen)
855 			break;
856 	}
857 
858 	data = talloc_array(in, char, genlen + len + 1);
859 	if (!data)
860 		return ENOMEM;
861 
862 	memcpy(data, gen, genlen);
863 	memcpy(data + genlen, node->children + off, len);
864 	if (off + len == node->childlen) {
865 		data[genlen + len] = 0;
866 		len++;
867 	}
868 
869 	send_reply(conn, XS_DIRECTORY_PART, data, genlen + len);
870 
871 	return 0;
872 }
873 
do_read(struct connection * conn,struct buffered_data * in)874 static int do_read(struct connection *conn, struct buffered_data *in)
875 {
876 	struct node *node;
877 
878 	node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
879 	if (!node)
880 		return errno;
881 
882 	send_reply(conn, XS_READ, node->data, node->datalen);
883 
884 	return 0;
885 }
886 
delete_node_single(struct connection * conn,struct node * node)887 static void delete_node_single(struct connection *conn, struct node *node)
888 {
889 	TDB_DATA key;
890 
891 	if (access_node(conn, node, NODE_ACCESS_DELETE, &key))
892 		return;
893 
894 	if (tdb_delete(tdb_ctx, key) != 0) {
895 		corrupt(conn, "Could not delete '%s'", node->name);
896 		return;
897 	}
898 
899 	domain_entry_dec(conn, node);
900 }
901 
902 /* Must not be / */
basename(const char * name)903 static char *basename(const char *name)
904 {
905 	return strrchr(name, '/') + 1;
906 }
907 
construct_node(struct connection * conn,const void * ctx,const char * name)908 static struct node *construct_node(struct connection *conn, const void *ctx,
909 				   const char *name)
910 {
911 	const char *base;
912 	unsigned int baselen;
913 	struct node *parent, *node;
914 	char *children, *parentname = get_parent(ctx, name);
915 
916 	if (!parentname)
917 		return NULL;
918 
919 	/* If parent doesn't exist, create it. */
920 	parent = read_node(conn, parentname, parentname);
921 	if (!parent)
922 		parent = construct_node(conn, ctx, parentname);
923 	if (!parent)
924 		return NULL;
925 
926 	if (domain_entry(conn) >= quota_nb_entry_per_domain) {
927 		errno = ENOSPC;
928 		return NULL;
929 	}
930 
931 	/* Add child to parent. */
932 	base = basename(name);
933 	baselen = strlen(base) + 1;
934 	children = talloc_array(ctx, char, parent->childlen + baselen);
935 	if (!children)
936 		goto nomem;
937 	memcpy(children, parent->children, parent->childlen);
938 	memcpy(children + parent->childlen, base, baselen);
939 	parent->children = children;
940 	parent->childlen += baselen;
941 
942 	/* Allocate node */
943 	node = talloc(ctx, struct node);
944 	if (!node)
945 		goto nomem;
946 	node->name = talloc_strdup(node, name);
947 	if (!node->name)
948 		goto nomem;
949 
950 	/* Inherit permissions, except unprivileged domains own what they create */
951 	node->num_perms = parent->num_perms;
952 	node->perms = talloc_memdup(node, parent->perms,
953 				    node->num_perms * sizeof(node->perms[0]));
954 	if (!node->perms)
955 		goto nomem;
956 	if (domain_is_unprivileged(conn))
957 		node->perms[0].id = conn->id;
958 
959 	/* No children, no data */
960 	node->children = node->data = NULL;
961 	node->childlen = node->datalen = 0;
962 	node->parent = parent;
963 	domain_entry_inc(conn, node);
964 	return node;
965 
966 nomem:
967 	errno = ENOMEM;
968 	return NULL;
969 }
970 
destroy_node(void * _node)971 static int destroy_node(void *_node)
972 {
973 	struct node *node = _node;
974 	TDB_DATA key;
975 
976 	if (streq(node->name, "/"))
977 		corrupt(NULL, "Destroying root node!");
978 
979 	key.dptr = (void *)node->name;
980 	key.dsize = strlen(node->name);
981 
982 	tdb_delete(tdb_ctx, key);
983 	return 0;
984 }
985 
create_node(struct connection * conn,const void * ctx,const char * name,void * data,unsigned int datalen)986 static struct node *create_node(struct connection *conn, const void *ctx,
987 				const char *name,
988 				void *data, unsigned int datalen)
989 {
990 	struct node *node, *i;
991 
992 	node = construct_node(conn, ctx, name);
993 	if (!node)
994 		return NULL;
995 
996 	node->data = data;
997 	node->datalen = datalen;
998 
999 	/* We write out the nodes down, setting destructor in case
1000 	 * something goes wrong. */
1001 	for (i = node; i; i = i->parent) {
1002 		if (write_node(conn, i)) {
1003 			domain_entry_dec(conn, i);
1004 			return NULL;
1005 		}
1006 		talloc_set_destructor(i, destroy_node);
1007 	}
1008 
1009 	/* OK, now remove destructors so they stay around */
1010 	for (i = node; i; i = i->parent)
1011 		talloc_set_destructor(i, NULL);
1012 	return node;
1013 }
1014 
1015 /* path, data... */
do_write(struct connection * conn,struct buffered_data * in)1016 static int do_write(struct connection *conn, struct buffered_data *in)
1017 {
1018 	unsigned int offset, datalen;
1019 	struct node *node;
1020 	char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
1021 	char *name;
1022 
1023 	/* Extra "strings" can be created by binary data. */
1024 	if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
1025 		return EINVAL;
1026 
1027 	offset = strlen(vec[0]) + 1;
1028 	datalen = in->used - offset;
1029 
1030 	node = get_node_canonicalized(conn, in, vec[0], &name, XS_PERM_WRITE);
1031 	if (!node) {
1032 		/* No permissions, invalid input? */
1033 		if (errno != ENOENT)
1034 			return errno;
1035 		node = create_node(conn, in, name, in->buffer + offset,
1036 				   datalen);
1037 		if (!node)
1038 			return errno;
1039 	} else {
1040 		node->data = in->buffer + offset;
1041 		node->datalen = datalen;
1042 		if (write_node(conn, node))
1043 			return errno;
1044 	}
1045 
1046 	fire_watches(conn, in, name, false);
1047 	send_ack(conn, XS_WRITE);
1048 
1049 	return 0;
1050 }
1051 
do_mkdir(struct connection * conn,struct buffered_data * in)1052 static int do_mkdir(struct connection *conn, struct buffered_data *in)
1053 {
1054 	struct node *node;
1055 	char *name;
1056 
1057 	node = get_node_canonicalized(conn, in, onearg(in), &name,
1058 				      XS_PERM_WRITE);
1059 
1060 	/* If it already exists, fine. */
1061 	if (!node) {
1062 		/* No permissions? */
1063 		if (errno != ENOENT)
1064 			return errno;
1065 		node = create_node(conn, in, name, NULL, 0);
1066 		if (!node)
1067 			return errno;
1068 		fire_watches(conn, in, name, false);
1069 	}
1070 	send_ack(conn, XS_MKDIR);
1071 
1072 	return 0;
1073 }
1074 
delete_node(struct connection * conn,struct node * node)1075 static void delete_node(struct connection *conn, struct node *node)
1076 {
1077 	unsigned int i;
1078 	char *name;
1079 
1080 	/* Delete self, then delete children.  If we crash, then the worst
1081 	   that can happen is the children will continue to take up space, but
1082 	   will otherwise be unreachable. */
1083 	delete_node_single(conn, node);
1084 
1085 	/* Delete children, too. */
1086 	for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
1087 		struct node *child;
1088 
1089 		name = talloc_asprintf(node, "%s/%s", node->name,
1090 				       node->children + i);
1091 		child = name ? read_node(conn, node, name) : NULL;
1092 		if (child) {
1093 			delete_node(conn, child);
1094 		}
1095 		else {
1096 			trace("delete_node: Error deleting child '%s/%s'!\n",
1097 			      node->name, node->children + i);
1098 			/* Skip it, we've already deleted the parent. */
1099 		}
1100 		talloc_free(name);
1101 	}
1102 }
1103 
1104 
1105 /* Delete memory using memmove. */
memdel(void * mem,unsigned off,unsigned len,unsigned total)1106 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
1107 {
1108 	memmove(mem + off, mem + off + len, total - off - len);
1109 }
1110 
1111 
remove_child_entry(struct connection * conn,struct node * node,size_t offset)1112 static int remove_child_entry(struct connection *conn, struct node *node,
1113 			      size_t offset)
1114 {
1115 	size_t childlen = strlen(node->children + offset);
1116 	memdel(node->children, offset, childlen + 1, node->childlen);
1117 	node->childlen -= childlen + 1;
1118 	return write_node(conn, node);
1119 }
1120 
1121 
delete_child(struct connection * conn,struct node * node,const char * childname)1122 static int delete_child(struct connection *conn,
1123 			struct node *node, const char *childname)
1124 {
1125 	unsigned int i;
1126 
1127 	for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
1128 		if (streq(node->children+i, childname)) {
1129 			return remove_child_entry(conn, node, i);
1130 		}
1131 	}
1132 	corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
1133 	return ENOENT;
1134 }
1135 
1136 
_rm(struct connection * conn,const void * ctx,struct node * node,const char * name)1137 static int _rm(struct connection *conn, const void *ctx, struct node *node,
1138 	       const char *name)
1139 {
1140 	/* Delete from parent first, then if we crash, the worst that can
1141 	   happen is the child will continue to take up space, but will
1142 	   otherwise be unreachable. */
1143 	struct node *parent;
1144 	char *parentname = get_parent(ctx, name);
1145 
1146 	if (!parentname)
1147 		return errno;
1148 
1149 	parent = read_node(conn, ctx, parentname);
1150 	if (!parent)
1151 		return (errno == ENOMEM) ? ENOMEM : EINVAL;
1152 
1153 	if (delete_child(conn, parent, basename(name)))
1154 		return EINVAL;
1155 
1156 	delete_node(conn, node);
1157 	return 0;
1158 }
1159 
1160 
do_rm(struct connection * conn,struct buffered_data * in)1161 static int do_rm(struct connection *conn, struct buffered_data *in)
1162 {
1163 	struct node *node;
1164 	int ret;
1165 	char *name;
1166 	char *parentname;
1167 
1168 	node = get_node_canonicalized(conn, in, onearg(in), &name,
1169 				      XS_PERM_WRITE);
1170 	if (!node) {
1171 		/* Didn't exist already?  Fine, if parent exists. */
1172 		if (errno == ENOENT) {
1173 			parentname = get_parent(in, name);
1174 			if (!parentname)
1175 				return errno;
1176 			node = read_node(conn, in, parentname);
1177 			if (node) {
1178 				send_ack(conn, XS_RM);
1179 				return 0;
1180 			}
1181 			/* Restore errno, just in case. */
1182 			if (errno != ENOMEM)
1183 				errno = ENOENT;
1184 		}
1185 		return errno;
1186 	}
1187 
1188 	if (streq(name, "/"))
1189 		return EINVAL;
1190 
1191 	ret = _rm(conn, in, node, name);
1192 	if (ret)
1193 		return ret;
1194 
1195 	fire_watches(conn, in, name, true);
1196 	send_ack(conn, XS_RM);
1197 
1198 	return 0;
1199 }
1200 
1201 
do_get_perms(struct connection * conn,struct buffered_data * in)1202 static int do_get_perms(struct connection *conn, struct buffered_data *in)
1203 {
1204 	struct node *node;
1205 	char *strings;
1206 	unsigned int len;
1207 
1208 	node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
1209 	if (!node)
1210 		return errno;
1211 
1212 	strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1213 	if (!strings)
1214 		return errno;
1215 
1216 	send_reply(conn, XS_GET_PERMS, strings, len);
1217 
1218 	return 0;
1219 }
1220 
do_set_perms(struct connection * conn,struct buffered_data * in)1221 static int do_set_perms(struct connection *conn, struct buffered_data *in)
1222 {
1223 	unsigned int num;
1224 	struct xs_permissions *perms;
1225 	char *name, *permstr;
1226 	struct node *node;
1227 
1228 	num = xs_count_strings(in->buffer, in->used);
1229 	if (num < 2)
1230 		return EINVAL;
1231 
1232 	/* First arg is node name. */
1233 	/* We must own node to do this (tools can do this too). */
1234 	node = get_node_canonicalized(conn, in, in->buffer, &name,
1235 				      XS_PERM_WRITE | XS_PERM_OWNER);
1236 	if (!node)
1237 		return errno;
1238 
1239 	permstr = in->buffer + strlen(in->buffer) + 1;
1240 	num--;
1241 
1242 	perms = talloc_array(node, struct xs_permissions, num);
1243 	if (!perms)
1244 		return ENOMEM;
1245 	if (!xs_strings_to_perms(perms, num, permstr))
1246 		return errno;
1247 
1248 	/* Unprivileged domains may not change the owner. */
1249 	if (domain_is_unprivileged(conn) && perms[0].id != node->perms[0].id)
1250 		return EPERM;
1251 
1252 	domain_entry_dec(conn, node);
1253 	node->perms = perms;
1254 	node->num_perms = num;
1255 	domain_entry_inc(conn, node);
1256 
1257 	if (write_node(conn, node))
1258 		return errno;
1259 
1260 	fire_watches(conn, in, name, false);
1261 	send_ack(conn, XS_SET_PERMS);
1262 
1263 	return 0;
1264 }
1265 
1266 static struct {
1267 	const char *str;
1268 	int (*func)(struct connection *conn, struct buffered_data *in);
1269 } const wire_funcs[XS_TYPE_COUNT] = {
1270 	[XS_CONTROL]           = { "CONTROL",           do_control },
1271 	[XS_DIRECTORY]         = { "DIRECTORY",         send_directory },
1272 	[XS_READ]              = { "READ",              do_read },
1273 	[XS_GET_PERMS]         = { "GET_PERMS",         do_get_perms },
1274 	[XS_WATCH]             = { "WATCH",             do_watch },
1275 	[XS_UNWATCH]           = { "UNWATCH",           do_unwatch },
1276 	[XS_TRANSACTION_START] = { "TRANSACTION_START", do_transaction_start },
1277 	[XS_TRANSACTION_END]   = { "TRANSACTION_END",   do_transaction_end },
1278 	[XS_INTRODUCE]         = { "INTRODUCE",         do_introduce },
1279 	[XS_RELEASE]           = { "RELEASE",           do_release },
1280 	[XS_GET_DOMAIN_PATH]   = { "GET_DOMAIN_PATH",   do_get_domain_path },
1281 	[XS_WRITE]             = { "WRITE",             do_write },
1282 	[XS_MKDIR]             = { "MKDIR",             do_mkdir },
1283 	[XS_RM]                = { "RM",                do_rm },
1284 	[XS_SET_PERMS]         = { "SET_PERMS",         do_set_perms },
1285 	[XS_WATCH_EVENT]       = { "WATCH_EVENT",       NULL },
1286 	[XS_ERROR]             = { "ERROR",             NULL },
1287 	[XS_IS_DOMAIN_INTRODUCED] =
1288 			{ "IS_DOMAIN_INTRODUCED", do_is_domain_introduced },
1289 	[XS_RESUME]            = { "RESUME",            do_resume },
1290 	[XS_SET_TARGET]        = { "SET_TARGET",        do_set_target },
1291 	[XS_RESET_WATCHES]     = { "RESET_WATCHES",     do_reset_watches },
1292 	[XS_DIRECTORY_PART]    = { "DIRECTORY_PART",    send_directory_part },
1293 };
1294 
sockmsg_string(enum xsd_sockmsg_type type)1295 static const char *sockmsg_string(enum xsd_sockmsg_type type)
1296 {
1297 	if ((unsigned)type < XS_TYPE_COUNT && wire_funcs[type].str)
1298 		return wire_funcs[type].str;
1299 
1300 	return "**UNKNOWN**";
1301 }
1302 
1303 /* Process "in" for conn: "in" will vanish after this conversation, so
1304  * we can talloc off it for temporary variables.  May free "conn".
1305  */
process_message(struct connection * conn,struct buffered_data * in)1306 static void process_message(struct connection *conn, struct buffered_data *in)
1307 {
1308 	struct transaction *trans;
1309 	enum xsd_sockmsg_type type = in->hdr.msg.type;
1310 	int ret;
1311 
1312 	trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1313 	if (IS_ERR(trans)) {
1314 		send_error(conn, -PTR_ERR(trans));
1315 		return;
1316 	}
1317 
1318 	assert(conn->transaction == NULL);
1319 	conn->transaction = trans;
1320 
1321 	if ((unsigned)type < XS_TYPE_COUNT && wire_funcs[type].func)
1322 		ret = wire_funcs[type].func(conn, in);
1323 	else {
1324 		eprintf("Client unknown operation %i", type);
1325 		ret = ENOSYS;
1326 	}
1327 	if (ret)
1328 		send_error(conn, ret);
1329 
1330 	conn->transaction = NULL;
1331 }
1332 
consider_message(struct connection * conn)1333 static void consider_message(struct connection *conn)
1334 {
1335 	if (verbose)
1336 		xprintf("Got message %s len %i from %p\n",
1337 			sockmsg_string(conn->in->hdr.msg.type),
1338 			conn->in->hdr.msg.len, conn);
1339 
1340 	process_message(conn, conn->in);
1341 
1342 	assert(conn->in == NULL);
1343 }
1344 
1345 /* Errors in reading or allocating here mean we get out of sync, so we
1346  * drop the whole client connection. */
handle_input(struct connection * conn)1347 static void handle_input(struct connection *conn)
1348 {
1349 	int bytes;
1350 	struct buffered_data *in;
1351 
1352 	if (!conn->in) {
1353 		conn->in = new_buffer(conn);
1354 		/* In case of no memory just try it again next time. */
1355 		if (!conn->in)
1356 			return;
1357 	}
1358 	in = conn->in;
1359 
1360 	/* Not finished header yet? */
1361 	if (in->inhdr) {
1362 		if (in->used != sizeof(in->hdr)) {
1363 			bytes = conn->read(conn, in->hdr.raw + in->used,
1364 					   sizeof(in->hdr) - in->used);
1365 			if (bytes < 0)
1366 				goto bad_client;
1367 			in->used += bytes;
1368 			if (in->used != sizeof(in->hdr))
1369 				return;
1370 
1371 			if (in->hdr.msg.len > XENSTORE_PAYLOAD_MAX) {
1372 				syslog(LOG_ERR, "Client tried to feed us %i",
1373 				       in->hdr.msg.len);
1374 				goto bad_client;
1375 			}
1376 		}
1377 
1378 		if (in->hdr.msg.len <= DEFAULT_BUFFER_SIZE)
1379 			in->buffer = in->default_buffer;
1380 		else
1381 			in->buffer = talloc_array(in, char, in->hdr.msg.len);
1382 		/* In case of no memory just try it again next time. */
1383 		if (!in->buffer)
1384 			return;
1385 		in->used = 0;
1386 		in->inhdr = false;
1387 	}
1388 
1389 	bytes = conn->read(conn, in->buffer + in->used,
1390 			   in->hdr.msg.len - in->used);
1391 	if (bytes < 0)
1392 		goto bad_client;
1393 
1394 	in->used += bytes;
1395 	if (in->used != in->hdr.msg.len)
1396 		return;
1397 
1398 	trace_io(conn, in, 0);
1399 	consider_message(conn);
1400 	return;
1401 
1402 bad_client:
1403 	/* Kill it. */
1404 	talloc_free(conn);
1405 }
1406 
handle_output(struct connection * conn)1407 static void handle_output(struct connection *conn)
1408 {
1409 	if (!write_messages(conn))
1410 		talloc_free(conn);
1411 }
1412 
new_connection(connwritefn_t * write,connreadfn_t * read)1413 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1414 {
1415 	struct connection *new;
1416 
1417 	new = talloc_zero(talloc_autofree_context(), struct connection);
1418 	if (!new)
1419 		return NULL;
1420 
1421 	new->fd = -1;
1422 	new->pollfd_idx = -1;
1423 	new->write = write;
1424 	new->read = read;
1425 	new->can_write = true;
1426 	new->transaction_started = 0;
1427 	INIT_LIST_HEAD(&new->out_list);
1428 	INIT_LIST_HEAD(&new->watches);
1429 	INIT_LIST_HEAD(&new->transaction_list);
1430 
1431 	list_add_tail(&new->list, &connections);
1432 	talloc_set_destructor(new, destroy_conn);
1433 	trace_create(new, "connection");
1434 	return new;
1435 }
1436 
1437 #ifdef NO_SOCKETS
accept_connection(int sock,bool canwrite)1438 static void accept_connection(int sock, bool canwrite)
1439 {
1440 }
1441 #else
writefd(struct connection * conn,const void * data,unsigned int len)1442 static int writefd(struct connection *conn, const void *data, unsigned int len)
1443 {
1444 	int rc;
1445 
1446 	while ((rc = write(conn->fd, data, len)) < 0) {
1447 		if (errno == EAGAIN) {
1448 			rc = 0;
1449 			break;
1450 		}
1451 		if (errno != EINTR)
1452 			break;
1453 	}
1454 
1455 	return rc;
1456 }
1457 
readfd(struct connection * conn,void * data,unsigned int len)1458 static int readfd(struct connection *conn, void *data, unsigned int len)
1459 {
1460 	int rc;
1461 
1462 	while ((rc = read(conn->fd, data, len)) < 0) {
1463 		if (errno == EAGAIN) {
1464 			rc = 0;
1465 			break;
1466 		}
1467 		if (errno != EINTR)
1468 			break;
1469 	}
1470 
1471 	/* Reading zero length means we're done with this connection. */
1472 	if ((rc == 0) && (len != 0)) {
1473 		errno = EBADF;
1474 		rc = -1;
1475 	}
1476 
1477 	return rc;
1478 }
1479 
accept_connection(int sock,bool canwrite)1480 static void accept_connection(int sock, bool canwrite)
1481 {
1482 	int fd;
1483 	struct connection *conn;
1484 
1485 	fd = accept(sock, NULL, NULL);
1486 	if (fd < 0)
1487 		return;
1488 
1489 	conn = new_connection(writefd, readfd);
1490 	if (conn) {
1491 		conn->fd = fd;
1492 		conn->can_write = canwrite;
1493 	} else
1494 		close(fd);
1495 }
1496 #endif
1497 
1498 static int tdb_flags;
1499 
1500 /* We create initial nodes manually. */
manual_node(const char * name,const char * child)1501 static void manual_node(const char *name, const char *child)
1502 {
1503 	struct node *node;
1504 	struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1505 
1506 	node = talloc_zero(NULL, struct node);
1507 	if (!node)
1508 		barf_perror("Could not allocate initial node %s", name);
1509 
1510 	node->name = name;
1511 	node->perms = &perms;
1512 	node->num_perms = 1;
1513 	node->children = (char *)child;
1514 	if (child)
1515 		node->childlen = strlen(child) + 1;
1516 
1517 	if (write_node(NULL, node))
1518 		barf_perror("Could not create initial node %s", name);
1519 	talloc_free(node);
1520 }
1521 
tdb_logger(TDB_CONTEXT * tdb,int level,const char * fmt,...)1522 static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...)
1523 {
1524 	va_list ap;
1525 	char *s;
1526 
1527 	va_start(ap, fmt);
1528 	s = talloc_vasprintf(NULL, fmt, ap);
1529 	va_end(ap);
1530 
1531 	if (s) {
1532 		trace("TDB: %s\n", s);
1533 		syslog(LOG_ERR, "TDB: %s",  s);
1534 		if (verbose)
1535 			xprintf("TDB: %s", s);
1536 		talloc_free(s);
1537 	} else {
1538 		trace("talloc failure during logging\n");
1539 		syslog(LOG_ERR, "talloc failure during logging\n");
1540 	}
1541 }
1542 
setup_structure(void)1543 static void setup_structure(void)
1544 {
1545 	char *tdbname;
1546 	tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1547 	if (!tdbname)
1548 		barf_perror("Could not create tdbname");
1549 
1550 	if (!(tdb_flags & TDB_INTERNAL))
1551 		unlink(tdbname);
1552 
1553 	tdb_ctx = tdb_open_ex(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT|O_EXCL,
1554 			      0640, &tdb_logger, NULL);
1555 	if (!tdb_ctx)
1556 		barf_perror("Could not create tdb file %s", tdbname);
1557 
1558 	manual_node("/", "tool");
1559 	manual_node("/tool", "xenstored");
1560 	manual_node("/tool/xenstored", NULL);
1561 
1562 	check_store();
1563 }
1564 
1565 
hash_from_key_fn(void * k)1566 static unsigned int hash_from_key_fn(void *k)
1567 {
1568 	char *str = k;
1569 	unsigned int hash = 5381;
1570 	char c;
1571 
1572 	while ((c = *str++))
1573 		hash = ((hash << 5) + hash) + (unsigned int)c;
1574 
1575 	return hash;
1576 }
1577 
1578 
keys_equal_fn(void * key1,void * key2)1579 static int keys_equal_fn(void *key1, void *key2)
1580 {
1581 	return 0 == strcmp((char *)key1, (char *)key2);
1582 }
1583 
1584 
child_name(const char * s1,const char * s2)1585 static char *child_name(const char *s1, const char *s2)
1586 {
1587 	if (strcmp(s1, "/")) {
1588 		return talloc_asprintf(NULL, "%s/%s", s1, s2);
1589 	}
1590 	else {
1591 		return talloc_asprintf(NULL, "/%s", s2);
1592 	}
1593 }
1594 
1595 
remember_string(struct hashtable * hash,const char * str)1596 int remember_string(struct hashtable *hash, const char *str)
1597 {
1598 	char *k = malloc(strlen(str) + 1);
1599 
1600 	if (!k)
1601 		return 0;
1602 	strcpy(k, str);
1603 	return hashtable_insert(hash, k, (void *)1);
1604 }
1605 
1606 
1607 /**
1608  * A node has a children field that names the children of the node, separated
1609  * by NULs.  We check whether there are entries in there that are duplicated
1610  * (and if so, delete the second one), and whether there are any that do not
1611  * have a corresponding child node (and if so, delete them).  Each valid child
1612  * is then recursively checked.
1613  *
1614  * No deleting is performed if the recovery flag is cleared (i.e. -R was
1615  * passed on the command line).
1616  *
1617  * As we go, we record each node in the given reachable hashtable.  These
1618  * entries will be used later in clean_store.
1619  */
check_store_(const char * name,struct hashtable * reachable)1620 static int check_store_(const char *name, struct hashtable *reachable)
1621 {
1622 	struct node *node = read_node(NULL, name, name);
1623 	int ret = 0;
1624 
1625 	if (node) {
1626 		size_t i = 0;
1627 
1628 		struct hashtable * children =
1629 			create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1630 
1631 		if (!remember_string(reachable, name)) {
1632 			hashtable_destroy(children, 0);
1633 			log("check_store: ENOMEM");
1634 			return ENOMEM;
1635 		}
1636 
1637 		while (i < node->childlen && !ret) {
1638 			struct node *childnode;
1639 			size_t childlen = strlen(node->children + i);
1640 			char * childname = child_name(node->name,
1641 						      node->children + i);
1642 
1643 			if (!childname) {
1644 				log("check_store: ENOMEM");
1645 				ret = ENOMEM;
1646 				break;
1647 			}
1648 			childnode = read_node(NULL, childname, childname);
1649 
1650 			if (childnode) {
1651 				if (hashtable_search(children, childname)) {
1652 					log("check_store: '%s' is duplicated!",
1653 					    childname);
1654 
1655 					if (recovery) {
1656 						remove_child_entry(NULL, node,
1657 								   i);
1658 						i -= childlen + 1;
1659 					}
1660 				}
1661 				else {
1662 					if (!remember_string(children,
1663 							     childname)) {
1664 						log("check_store: ENOMEM");
1665 						talloc_free(childnode);
1666 						talloc_free(childname);
1667 						ret = ENOMEM;
1668 						break;
1669 					}
1670 					ret = check_store_(childname,
1671 							   reachable);
1672 				}
1673 			} else if (errno != ENOMEM) {
1674 				log("check_store: No child '%s' found!\n",
1675 				    childname);
1676 
1677 				if (recovery) {
1678 					remove_child_entry(NULL, node, i);
1679 					i -= childlen + 1;
1680 				}
1681 			} else {
1682 				log("check_store: ENOMEM");
1683 				ret = ENOMEM;
1684 			}
1685 
1686 			talloc_free(childnode);
1687 			talloc_free(childname);
1688 			i += childlen + 1;
1689 		}
1690 
1691 		hashtable_destroy(children, 0 /* Don't free values (they are
1692 						 all (void *)1) */);
1693 		talloc_free(node);
1694 	} else if (errno != ENOMEM) {
1695 		/* Impossible, because no database should ever be without the
1696 		   root, and otherwise, we've just checked in our caller
1697 		   (which made a recursive call to get here). */
1698 
1699 		log("check_store: No child '%s' found: impossible!", name);
1700 	} else {
1701 		log("check_store: ENOMEM");
1702 		ret = ENOMEM;
1703 	}
1704 
1705 	return ret;
1706 }
1707 
1708 
1709 /**
1710  * Helper to clean_store below.
1711  */
clean_store_(TDB_CONTEXT * tdb,TDB_DATA key,TDB_DATA val,void * private)1712 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1713 			void *private)
1714 {
1715 	struct hashtable *reachable = private;
1716 	char *slash;
1717 	char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1718 
1719 	if (!name) {
1720 		log("clean_store: ENOMEM");
1721 		return 1;
1722 	}
1723 
1724 	if (name[0] != '/') {
1725 		slash = strchr(name, '/');
1726 		if (slash)
1727 			*slash = 0;
1728 	}
1729 	if (!hashtable_search(reachable, name)) {
1730 		log("clean_store: '%s' is orphaned!", name);
1731 		if (recovery) {
1732 			tdb_delete(tdb, key);
1733 		}
1734 	}
1735 
1736 	talloc_free(name);
1737 
1738 	return 0;
1739 }
1740 
1741 
1742 /**
1743  * Given the list of reachable nodes, iterate over the whole store, and
1744  * remove any that were not reached.
1745  */
clean_store(struct hashtable * reachable)1746 static void clean_store(struct hashtable *reachable)
1747 {
1748 	tdb_traverse(tdb_ctx, &clean_store_, reachable);
1749 }
1750 
1751 
check_store(void)1752 void check_store(void)
1753 {
1754 	char * root = talloc_strdup(NULL, "/");
1755 	struct hashtable * reachable =
1756 		create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1757 
1758 	if (!reachable) {
1759 		log("check_store: ENOMEM");
1760 		return;
1761 	}
1762 
1763 	log("Checking store ...");
1764 	if (!check_store_(root, reachable) &&
1765 	    !check_transactions(reachable))
1766 		clean_store(reachable);
1767 	log("Checking store complete.");
1768 
1769 	hashtable_destroy(reachable, 0 /* Don't free values (they are all
1770 					  (void *)1) */);
1771 	talloc_free(root);
1772 }
1773 
1774 
1775 /* Something is horribly wrong: check the store. */
corrupt(struct connection * conn,const char * fmt,...)1776 void corrupt(struct connection *conn, const char *fmt, ...)
1777 {
1778 	va_list arglist;
1779 	char *str;
1780 	int saved_errno = errno;
1781 
1782 	va_start(arglist, fmt);
1783 	str = talloc_vasprintf(NULL, fmt, arglist);
1784 	va_end(arglist);
1785 
1786 	log("corruption detected by connection %i: err %s: %s",
1787 	    conn ? (int)conn->id : -1, strerror(saved_errno), str);
1788 
1789 	check_store();
1790 }
1791 
1792 
1793 #ifdef NO_SOCKETS
init_sockets(int ** psock,int ** pro_sock)1794 static void init_sockets(int **psock, int **pro_sock)
1795 {
1796 	static int minus_one = -1;
1797 	*psock = *pro_sock = &minus_one;
1798 }
1799 #else
destroy_fd(void * _fd)1800 static int destroy_fd(void *_fd)
1801 {
1802 	int *fd = _fd;
1803 	close(*fd);
1804 	return 0;
1805 }
1806 
init_sockets(int ** psock,int ** pro_sock)1807 static void init_sockets(int **psock, int **pro_sock)
1808 {
1809 	struct sockaddr_un addr;
1810 	int *sock, *ro_sock;
1811 	const char *soc_str = xs_daemon_socket();
1812 	const char *soc_str_ro = xs_daemon_socket_ro();
1813 
1814 	/* Create sockets for them to listen to. */
1815 	*psock = sock = talloc(talloc_autofree_context(), int);
1816 	if (!sock)
1817 		barf_perror("No memory when creating sockets");
1818 	*sock = socket(PF_UNIX, SOCK_STREAM, 0);
1819 	if (*sock < 0)
1820 		barf_perror("Could not create socket");
1821 	*pro_sock = ro_sock = talloc(talloc_autofree_context(), int);
1822 	if (!ro_sock)
1823 		barf_perror("No memory when creating sockets");
1824 	*ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1825 	if (*ro_sock < 0)
1826 		barf_perror("Could not create socket");
1827 	talloc_set_destructor(sock, destroy_fd);
1828 	talloc_set_destructor(ro_sock, destroy_fd);
1829 
1830 	/* FIXME: Be more sophisticated, don't mug running daemon. */
1831 	unlink(soc_str);
1832 	unlink(soc_str_ro);
1833 
1834 	addr.sun_family = AF_UNIX;
1835 
1836 	if(strlen(soc_str) >= sizeof(addr.sun_path))
1837 		barf_perror("socket string '%s' too long", soc_str);
1838 	strcpy(addr.sun_path, soc_str);
1839 	if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1840 		barf_perror("Could not bind socket to %s", soc_str);
1841 
1842 	if(strlen(soc_str_ro) >= sizeof(addr.sun_path))
1843 		barf_perror("socket string '%s' too long", soc_str_ro);
1844 	strcpy(addr.sun_path, soc_str_ro);
1845 	if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1846 		barf_perror("Could not bind socket to %s", soc_str_ro);
1847 
1848 	if (chmod(soc_str, 0600) != 0
1849 	    || chmod(soc_str_ro, 0660) != 0)
1850 		barf_perror("Could not chmod sockets");
1851 
1852 	if (listen(*sock, 1) != 0
1853 	    || listen(*ro_sock, 1) != 0)
1854 		barf_perror("Could not listen on sockets");
1855 
1856 
1857 }
1858 #endif
1859 
usage(void)1860 static void usage(void)
1861 {
1862 	fprintf(stderr,
1863 "Usage:\n"
1864 "\n"
1865 "  xenstored <options>\n"
1866 "\n"
1867 "where options may include:\n"
1868 "\n"
1869 "  -D, --no-domain-init    to state that xenstored should not initialise dom0,\n"
1870 "  -F, --pid-file <file>   giving a file for the daemon's pid to be written,\n"
1871 "  -H, --help              to output this message,\n"
1872 "  -N, --no-fork           to request that the daemon does not fork,\n"
1873 "  -P, --output-pid        to request that the pid of the daemon is output,\n"
1874 "  -T, --trace-file <file> giving the file for logging, and\n"
1875 "  -E, --entry-nb <nb>     limit the number of entries per domain,\n"
1876 "  -S, --entry-size <size> limit the size of entry per domain, and\n"
1877 "  -W, --watch-nb <nb>     limit the number of watches per domain,\n"
1878 "  -t, --transaction <nb>  limit the number of transaction allowed per domain,\n"
1879 "  -R, --no-recovery       to request that no recovery should be attempted when\n"
1880 "                          the store is corrupted (debug only),\n"
1881 "  -I, --internal-db       store database in memory, not on disk\n"
1882 "  -V, --verbose           to request verbose execution.\n");
1883 }
1884 
1885 
1886 static struct option options[] = {
1887 	{ "no-domain-init", 0, NULL, 'D' },
1888 	{ "entry-nb", 1, NULL, 'E' },
1889 	{ "pid-file", 1, NULL, 'F' },
1890 	{ "event", 1, NULL, 'e' },
1891 	{ "master-domid", 1, NULL, 'm' },
1892 	{ "help", 0, NULL, 'H' },
1893 	{ "no-fork", 0, NULL, 'N' },
1894 	{ "priv-domid", 1, NULL, 'p' },
1895 	{ "output-pid", 0, NULL, 'P' },
1896 	{ "entry-size", 1, NULL, 'S' },
1897 	{ "trace-file", 1, NULL, 'T' },
1898 	{ "transaction", 1, NULL, 't' },
1899 	{ "no-recovery", 0, NULL, 'R' },
1900 	{ "internal-db", 0, NULL, 'I' },
1901 	{ "verbose", 0, NULL, 'V' },
1902 	{ "watch-nb", 1, NULL, 'W' },
1903 	{ NULL, 0, NULL, 0 } };
1904 
1905 extern void dump_conn(struct connection *conn);
1906 int dom0_domid = 0;
1907 int dom0_event = 0;
1908 int priv_domid = 0;
1909 
main(int argc,char * argv[])1910 int main(int argc, char *argv[])
1911 {
1912 	int opt, *sock = NULL, *ro_sock = NULL;
1913 	int sock_pollfd_idx = -1, ro_sock_pollfd_idx = -1;
1914 	bool dofork = true;
1915 	bool outputpid = false;
1916 	bool no_domain_init = false;
1917 	const char *pidfile = NULL;
1918 	int timeout;
1919 
1920 
1921 	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RVW:", options,
1922 				  NULL)) != -1) {
1923 		switch (opt) {
1924 		case 'D':
1925 			no_domain_init = true;
1926 			break;
1927 		case 'E':
1928 			quota_nb_entry_per_domain = strtol(optarg, NULL, 10);
1929 			break;
1930 		case 'F':
1931 			pidfile = optarg;
1932 			break;
1933 		case 'H':
1934 			usage();
1935 			return 0;
1936 		case 'N':
1937 			dofork = false;
1938 			break;
1939 		case 'P':
1940 			outputpid = true;
1941 			break;
1942 		case 'R':
1943 			recovery = false;
1944 			break;
1945 		case 'S':
1946 			quota_max_entry_size = strtol(optarg, NULL, 10);
1947 			break;
1948 		case 't':
1949 			quota_max_transaction = strtol(optarg, NULL, 10);
1950 			break;
1951 		case 'T':
1952 			tracefile = optarg;
1953 			break;
1954 		case 'I':
1955 			tdb_flags = TDB_INTERNAL|TDB_NOLOCK;
1956 			break;
1957 		case 'V':
1958 			verbose = true;
1959 			break;
1960 		case 'W':
1961 			quota_nb_watch_per_domain = strtol(optarg, NULL, 10);
1962 			break;
1963 		case 'e':
1964 			dom0_event = strtol(optarg, NULL, 10);
1965 			break;
1966 		case 'm':
1967 			dom0_domid = strtol(optarg, NULL, 10);
1968 			break;
1969 		case 'p':
1970 			priv_domid = strtol(optarg, NULL, 10);
1971 			break;
1972 		}
1973 	}
1974 	if (optind != argc)
1975 		barf("%s: No arguments desired", argv[0]);
1976 
1977 	reopen_log();
1978 
1979 	/* make sure xenstored directories exist */
1980 	/* Errors ignored here, will be reported when we open files */
1981 	mkdir(xs_daemon_rundir(), 0755);
1982 	mkdir(xs_daemon_rootdir(), 0755);
1983 
1984 	if (dofork) {
1985 		openlog("xenstored", 0, LOG_DAEMON);
1986 		daemonize();
1987 	}
1988 	if (pidfile)
1989 		write_pidfile(pidfile);
1990 
1991 	/* Talloc leak reports go to stderr, which is closed if we fork. */
1992 	if (!dofork)
1993 		talloc_enable_leak_report_full();
1994 
1995 	/* Don't kill us with SIGPIPE. */
1996 	signal(SIGPIPE, SIG_IGN);
1997 
1998 	talloc_enable_null_tracking();
1999 
2000 	init_sockets(&sock, &ro_sock);
2001 
2002 	init_pipe(reopen_log_pipe);
2003 
2004 	/* Setup the database */
2005 	setup_structure();
2006 
2007 	/* Listen to hypervisor. */
2008 	if (!no_domain_init)
2009 		domain_init();
2010 
2011 	/* Restore existing connections. */
2012 	restore_existing_connections();
2013 
2014 	if (outputpid) {
2015 		printf("%ld\n", (long)getpid());
2016 		fflush(stdout);
2017 	}
2018 
2019 	/* redirect to /dev/null now we're ready to accept connections */
2020 	if (dofork)
2021 		finish_daemonize();
2022 
2023 	signal(SIGHUP, trigger_reopen_log);
2024 	if (tracefile)
2025 		tracefile = talloc_strdup(NULL, tracefile);
2026 
2027 	/* Get ready to listen to the tools. */
2028 	initialize_fds(*sock, &sock_pollfd_idx, *ro_sock, &ro_sock_pollfd_idx,
2029 		       &timeout);
2030 
2031 	/* Tell the kernel we're up and running. */
2032 	xenbus_notify_running();
2033 
2034 #if defined(XEN_SYSTEMD_ENABLED)
2035 	sd_notify(1, "READY=1");
2036 	fprintf(stderr, SD_NOTICE "xenstored is ready\n");
2037 #endif
2038 
2039 	/* Main loop. */
2040 	for (;;) {
2041 		struct connection *conn, *next;
2042 
2043 		if (poll(fds, nr_fds, timeout) < 0) {
2044 			if (errno == EINTR)
2045 				continue;
2046 			barf_perror("Poll failed");
2047 		}
2048 
2049 		if (reopen_log_pipe0_pollfd_idx != -1) {
2050 			if (fds[reopen_log_pipe0_pollfd_idx].revents
2051 			    & ~POLLIN) {
2052 				close(reopen_log_pipe[0]);
2053 				close(reopen_log_pipe[1]);
2054 				init_pipe(reopen_log_pipe);
2055 			} else if (fds[reopen_log_pipe0_pollfd_idx].revents
2056 				   & POLLIN) {
2057 				char c;
2058 				if (read(reopen_log_pipe[0], &c, 1) != 1)
2059 					barf_perror("read failed");
2060 				reopen_log();
2061 			}
2062 			reopen_log_pipe0_pollfd_idx = -1;
2063 		}
2064 
2065 		if (sock_pollfd_idx != -1) {
2066 			if (fds[sock_pollfd_idx].revents & ~POLLIN) {
2067 				barf_perror("sock poll failed");
2068 				break;
2069 			} else if (fds[sock_pollfd_idx].revents & POLLIN) {
2070 				accept_connection(*sock, true);
2071 				sock_pollfd_idx = -1;
2072 			}
2073 		}
2074 
2075 		if (ro_sock_pollfd_idx != -1) {
2076 			if (fds[ro_sock_pollfd_idx].revents & ~POLLIN) {
2077 				barf_perror("ro sock poll failed");
2078 				break;
2079 			} else if (fds[ro_sock_pollfd_idx].revents & POLLIN) {
2080 				accept_connection(*ro_sock, false);
2081 				ro_sock_pollfd_idx = -1;
2082 			}
2083 		}
2084 
2085 		if (xce_pollfd_idx != -1) {
2086 			if (fds[xce_pollfd_idx].revents & ~POLLIN) {
2087 				barf_perror("xce_handle poll failed");
2088 				break;
2089 			} else if (fds[xce_pollfd_idx].revents & POLLIN) {
2090 				handle_event();
2091 				xce_pollfd_idx = -1;
2092 			}
2093 		}
2094 
2095 		next = list_entry(connections.next, typeof(*conn), list);
2096 		if (&next->list != &connections)
2097 			talloc_increase_ref_count(next);
2098 		while (&next->list != &connections) {
2099 			conn = next;
2100 
2101 			next = list_entry(conn->list.next,
2102 					  typeof(*conn), list);
2103 			if (&next->list != &connections)
2104 				talloc_increase_ref_count(next);
2105 
2106 			if (conn->domain) {
2107 				if (domain_can_read(conn))
2108 					handle_input(conn);
2109 				if (talloc_free(conn) == 0)
2110 					continue;
2111 
2112 				talloc_increase_ref_count(conn);
2113 				if (domain_can_write(conn) &&
2114 				    !list_empty(&conn->out_list))
2115 					handle_output(conn);
2116 				if (talloc_free(conn) == 0)
2117 					continue;
2118 			} else {
2119 				if (conn->pollfd_idx != -1) {
2120 					if (fds[conn->pollfd_idx].revents
2121 					    & ~(POLLIN|POLLOUT))
2122 						talloc_free(conn);
2123 					else if (fds[conn->pollfd_idx].revents
2124 						 & POLLIN)
2125 						handle_input(conn);
2126 				}
2127 				if (talloc_free(conn) == 0)
2128 					continue;
2129 
2130 				talloc_increase_ref_count(conn);
2131 
2132 				if (conn->pollfd_idx != -1) {
2133 					if (fds[conn->pollfd_idx].revents
2134 					    & ~(POLLIN|POLLOUT))
2135 						talloc_free(conn);
2136 					else if (fds[conn->pollfd_idx].revents
2137 						 & POLLOUT)
2138 						handle_output(conn);
2139 				}
2140 				if (talloc_free(conn) == 0)
2141 					continue;
2142 
2143 				conn->pollfd_idx = -1;
2144 			}
2145 		}
2146 
2147 		initialize_fds(*sock, &sock_pollfd_idx, *ro_sock,
2148 			       &ro_sock_pollfd_idx, &timeout);
2149 	}
2150 }
2151 
2152 /*
2153  * Local variables:
2154  *  c-file-style: "linux"
2155  *  indent-tabs-mode: t
2156  *  c-indent-level: 8
2157  *  c-basic-offset: 8
2158  *  tab-width: 8
2159  * End:
2160  */
2161