Support for socket-descriptor marshalling

This patch adds prinicipal support for transmitting socket descriptors
as RPC payload. Socket descriptors are handled by the linux-specific
implementation of the capability marshalling and unmarshalling functions
in 'ipc.h'. The 'Message' type in 'src/platform/linux_socket.h' has been
extended to carry multiple descriptors in a single message.

Unfortuately, we hit a problem (and potential show stopper) here:

  lx_sendmsg failed with -109 in lx_call()

The error code corresponds to ETOOMANYREFS. There is only one place in
the Linux kernel where this error code is used (net/unix/af_unix.c).
The code for 'unix_attach_fds()' suggests that there is a limit with
regard to the maximum number of references for a given Unix domain
socket. When the error occurs, core and init are running. The socket
of core's server entrypoint is present in the '/proc/pid/fd' of those
processes 8 times. The error occurs when core tries to perform an
RPC to the entrypoint to perform 'Ram_session::transfer_quota()'
(base/include/base/child.h at line 248).
This commit is contained in:
Norman Feske 2012-07-26 20:29:45 +02:00
parent 8b343d7e1a
commit ca4f574f4c
5 changed files with 270 additions and 60 deletions

View File

@ -0,0 +1,65 @@
/*
* \brief Linux-specific supplements to the IPC framework
* \author Norman Feske
* \date 2012-07-26
*/
/*
* Copyright (C) 2012 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU General Public License version 2.
*/
#ifndef _INCLUDE__BASE__IPC_H_
#define _INCLUDE__BASE__IPC_H_
#include <base/ipc_generic.h>
#include <base/snprintf.h>
extern "C" int raw_write_str(const char *str);
extern "C" void wait_for_continue(void);
#define PRAWW(fmt, ...) \
do { \
char str[128]; \
Genode::snprintf(str, sizeof(str), \
ESC_ERR fmt ESC_END "\n", ##__VA_ARGS__); \
raw_write_str(str); \
} while (0)
inline void Genode::Ipc_ostream::_marshal_capability(Genode::Native_capability const &cap)
{
PRAWW("_marshal_capability called: local_name=%ld, tid=%ld, socket=%d",
cap.local_name(), cap.dst().tid, cap.dst().socket);
_write_to_buf(cap.local_name());
_write_to_buf(cap.dst().tid);
if (cap.valid()) {
PRAWW("append_cap");
_snd_msg->append_cap(cap.dst().socket);
}
}
inline void Genode::Ipc_istream::_unmarshal_capability(Genode::Native_capability &cap)
{
long local_name = 0;
long tid = 0;
int socket = -1;
_read_from_buf(local_name);
_read_from_buf(tid);
bool const cap_valid = (tid != 0);
if (cap_valid)
socket = _rcv_msg->read_cap();
cap = Native_capability(Cap_dst_policy::Dst(tid, socket), local_name);
}
#endif /* _INCLUDE__BASE__IPC_H_ */

View File

@ -21,10 +21,30 @@ namespace Genode {
*/ */
class Msgbuf_base class Msgbuf_base
{ {
public:
enum { MAX_CAPS_PER_MSG = 8 };
protected: protected:
/*
* Capabilities (file descriptors) to be transferred
*/
int _caps[MAX_CAPS_PER_MSG];
Genode::size_t _used_caps;
Genode::size_t _read_cap_index;
/**
* Maximum size of plain-data message payload
*/
Genode::size_t _size; Genode::size_t _size;
char _msg_start[]; /* symbol marks start of message buffer data */
/**
* Actual size of plain-data message payload
*/
Genode::size_t _used_size;
char _msg_start[]; /* symbol marks start of message buffer data */
/* /*
* No member variables are allowed beyond this point! * No member variables are allowed beyond this point!
@ -34,6 +54,8 @@ namespace Genode {
char buf[]; char buf[];
Msgbuf_base() { reset_caps(); }
/** /**
* Return size of message buffer * Return size of message buffer
*/ */
@ -43,6 +65,36 @@ namespace Genode {
* Return address of message buffer * Return address of message buffer
*/ */
inline void *addr() { return &_msg_start[0]; }; inline void *addr() { return &_msg_start[0]; };
void reset_caps() { _used_caps = 0; _read_cap_index = 0; }
bool append_cap(int cap)
{
if (_used_caps == MAX_CAPS_PER_MSG)
return false;
_caps[_used_caps++] = cap;
return true;
}
int read_cap()
{
if (_read_cap_index == _used_caps)
return -1;
return _caps[_read_cap_index++];
}
size_t used_caps() const { return _used_caps; }
int cap(unsigned index) const
{
return index < _used_caps ? _caps[index] : -1;
}
size_t used_size() const { return _used_size; }
void used_size(size_t used_size) { _used_size = used_size; }
}; };

View File

@ -92,9 +92,10 @@ void Ipc_istream::_wait()
Ipc_istream::Ipc_istream(Msgbuf_base *rcv_msg) Ipc_istream::Ipc_istream(Msgbuf_base *rcv_msg)
: Ipc_unmarshaller(rcv_msg->buf, rcv_msg->size()), :
Native_capability(Dst(lx_gettid(), -1), 0), Ipc_unmarshaller(rcv_msg->buf, rcv_msg->size()),
_rcv_msg(rcv_msg) Native_capability(Dst(lx_gettid(), -1), 0),
_rcv_msg(rcv_msg)
{ } { }
@ -115,15 +116,16 @@ void Ipc_client::_prepare_next_call()
/* prepare response buffer */ /* prepare response buffer */
_read_offset = sizeof(long); _read_offset = sizeof(long);
_snd_msg->reset_caps();
} }
void Ipc_client::_call() void Ipc_client::_call()
{ {
if (Ipc_ostream::_dst.valid()) { if (Ipc_ostream::_dst.valid()) {
lx_call(Ipc_ostream::_dst.dst().tid, _snd_msg->used_size(_write_offset);
_snd_msg->buf, _write_offset, lx_call(Ipc_ostream::_dst.dst().tid, *_snd_msg, *_rcv_msg);
_rcv_msg->buf, _rcv_msg->size());
} }
_prepare_next_call(); _prepare_next_call();
} }
@ -153,6 +155,9 @@ void Ipc_server::_prepare_next_reply_wait()
/* leave space for exc code at the beginning of the msgbuf */ /* leave space for exc code at the beginning of the msgbuf */
_write_offset += align_natural(sizeof(int)); _write_offset += align_natural(sizeof(int));
/* reset capability slots of send message buffer */
_snd_msg->reset_caps();
} }
@ -161,7 +166,7 @@ void Ipc_server::_wait()
_reply_needed = true; _reply_needed = true;
try { try {
int const reply_socket = lx_wait(_rcv_cs, _rcv_msg->buf, _rcv_msg->size()); int const reply_socket = lx_wait(_rcv_cs, *_rcv_msg);
/* /*
* Remember reply capability * Remember reply capability
@ -185,7 +190,8 @@ void Ipc_server::_wait()
void Ipc_server::_reply() void Ipc_server::_reply()
{ {
try { try {
lx_reply(Ipc_ostream::_dst.dst().socket, _snd_msg->buf, _write_offset); _snd_msg->used_size(_write_offset);
lx_reply(Ipc_ostream::_dst.dst().socket, *_snd_msg);
} catch (Ipc_error) { } } catch (Ipc_error) { }
_prepare_next_reply_wait(); _prepare_next_reply_wait();
@ -195,8 +201,10 @@ void Ipc_server::_reply()
void Ipc_server::_reply_wait() void Ipc_server::_reply_wait()
{ {
/* when first called, there was no request yet */ /* when first called, there was no request yet */
if (_reply_needed) if (_reply_needed) {
lx_reply(Ipc_ostream::_dst.dst().socket, _snd_msg->buf, _write_offset); _snd_msg->used_size(_write_offset);
lx_reply(Ipc_ostream::_dst.dst().socket, *_snd_msg);
}
_wait(); _wait();
} }
@ -231,5 +239,9 @@ Ipc_server::Ipc_server(Msgbuf_base *snd_msg, Msgbuf_base *rcv_msg)
if (thread) if (thread)
thread->tid().is_ipc_server = true; thread->tid().is_ipc_server = true;
/* override capability initialization performed by 'Ipc_istream' */
*static_cast<Native_capability *>(this) =
Native_capability(Native_capability::Dst(lx_gettid(), _rcv_cs), 0);
_prepare_next_reply_wait(); _prepare_next_reply_wait();
} }

View File

@ -57,16 +57,22 @@ namespace {
*/ */
struct Message struct Message
{ {
public:
enum { MAX_SDS_PER_MSG = Genode::Msgbuf_base::MAX_CAPS_PER_MSG };
private: private:
msghdr _msg; msghdr _msg;
sockaddr_un _addr; sockaddr_un _addr;
iovec _iovec; iovec _iovec;
char _cmsg_buf[CMSG_SPACE(sizeof(int))]; char _cmsg_buf[CMSG_SPACE(MAX_SDS_PER_MSG*sizeof(int))];
unsigned _num_sds;
public: public:
Message(long server_thread_id = -1) Message(long server_thread_id = -1) : _num_sds(0)
{ {
memset(&_msg, 0, sizeof(_msg)); memset(&_msg, 0, sizeof(_msg));
@ -77,6 +83,18 @@ namespace {
_msg.msg_name = &_addr; _msg.msg_name = &_addr;
_msg.msg_namelen = sizeof(_addr); _msg.msg_namelen = sizeof(_addr);
} }
/* initialize control message */
struct cmsghdr *cmsg;
_msg.msg_control = _cmsg_buf;
_msg.msg_controllen = sizeof(_cmsg_buf); /* buffer space available */
_msg.msg_flags |= MSG_CMSG_CLOEXEC;
cmsg = CMSG_FIRSTHDR(&_msg);
cmsg->cmsg_len = CMSG_LEN(0);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
_msg.msg_controllen = cmsg->cmsg_len; /* actual cmsg length */
} }
msghdr * msg() { return &_msg; } msghdr * msg() { return &_msg; }
@ -91,45 +109,47 @@ namespace {
_iovec.iov_len = buffer_len; _iovec.iov_len = buffer_len;
} }
/** void marshal_socket(int sd)
* Prepare slot for socket sending/reception
*
* Note, if this function is not called sockets are not accepted on
* 'recvmsg' and, therefore, do not occupy local file descriptors.
*/
void prepare_reply_socket_slot()
{ {
/* initialize control message */ *((int *)CMSG_DATA((cmsghdr *)_cmsg_buf) + _num_sds) = sd;
struct cmsghdr *cmsg;
_msg.msg_control = _cmsg_buf; _num_sds++;
_msg.msg_controllen = sizeof(_cmsg_buf); /* buffer space available */
_msg.msg_flags |= MSG_CMSG_CLOEXEC; struct cmsghdr *cmsg = CMSG_FIRSTHDR(&_msg);
cmsg = CMSG_FIRSTHDR(&_msg); cmsg->cmsg_len = CMSG_LEN(_num_sds*sizeof(int));
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
_msg.msg_controllen = cmsg->cmsg_len; /* actual cmsg length */ _msg.msg_controllen = cmsg->cmsg_len; /* actual cmsg length */
} }
void reply_socket(int sd) void accept_sockets(int num_sds)
{ {
if (!_msg.msg_control) { struct cmsghdr *cmsg = CMSG_FIRSTHDR(&_msg);
PRAW("reply-socket slot not prepared"); cmsg->cmsg_len = CMSG_LEN(num_sds*sizeof(int));
throw Genode::Ipc_error(); _msg.msg_controllen = cmsg->cmsg_len; /* actual cmsg length */
}
*(int *)CMSG_DATA((cmsghdr *)_cmsg_buf) = sd;
} }
int reply_socket() const int unmarshal_socket()
{ {
if (!_msg.msg_control) { int ret = *((int *)CMSG_DATA((cmsghdr *)_cmsg_buf) + _num_sds);
PRAW("reply-socket slot not prepared");
throw Genode::Ipc_error();
}
return *(int *)CMSG_DATA((cmsghdr *)_cmsg_buf); _num_sds++;
return ret;
}
/* XXX only for debugging */
int socket_at_index(int index)
{
return *((int *)CMSG_DATA((cmsghdr *)_cmsg_buf) + index);
}
unsigned num_sockets() const
{
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&_msg);
if (!cmsg)
return 0;
return (cmsg->cmsg_len - CMSG_ALIGN(sizeof(cmsghdr)))/sizeof(int);
} }
}; };
@ -170,8 +190,8 @@ static int lx_server_socket(Genode::Thread_base *thread)
* Utility: Send request to server and wait for reply * Utility: Send request to server and wait for reply
*/ */
static void lx_call(long thread_id, static void lx_call(long thread_id,
void *send_buf, Genode::size_t send_buf_len, Genode::Msgbuf_base &send_msgbuf,
void *recv_buf, Genode::size_t recv_buf_len) Genode::Msgbuf_base &recv_msgbuf)
{ {
int ret; int ret;
@ -184,29 +204,62 @@ static void lx_call(long thread_id,
ret = lx_socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, reply_channel); ret = lx_socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, reply_channel);
if (ret < 0) { if (ret < 0) {
PRAW("lx_socketpair failed with %d", ret); PRAW("lx_socketpair failed with %d", ret);
/* XXX */ wait_for_continue();
throw Genode::Ipc_error(); throw Genode::Ipc_error();
} }
send_msg.prepare_reply_socket_slot(); /* assemble message */
send_msg.reply_socket(reply_channel[REMOTE_SOCKET]);
send_msg.buffer(send_buf, send_buf_len); /* marshal reply capability */
send_msg.marshal_socket(reply_channel[REMOTE_SOCKET]);
/* marshal capabilities contained in 'send_msgbuf' */
if (send_msgbuf.used_caps() > 0)
PRAW("lx_call: marshal %d caps:", send_msgbuf.used_caps());
for (unsigned i = 0; i < send_msgbuf.used_caps(); i++) {
PRAW(" sd[%d]: %d", i, send_msgbuf.cap(i));
send_msg.marshal_socket(lx_dup(send_msgbuf.cap(i)));
}
send_msg.buffer(send_msgbuf.buf, send_msgbuf.used_size());
ret = lx_sendmsg(reply_channel[LOCAL_SOCKET], send_msg.msg(), 0); ret = lx_sendmsg(reply_channel[LOCAL_SOCKET], send_msg.msg(), 0);
if (ret < 0) { if (ret < 0) {
PRAW("lx_sendmsg failed with %d in lx_call()", ret); PRAW("lx_sendmsg failed with %d in lx_call()", ret);
while (1);
/* XXX */ wait_for_continue();
throw Genode::Ipc_error(); throw Genode::Ipc_error();
} }
Message recv_msg; Message recv_msg;
recv_msg.accept_sockets(Message::MAX_SDS_PER_MSG);
recv_msg.buffer(recv_buf, recv_buf_len); recv_msg.buffer(recv_msgbuf.buf, recv_msgbuf.size());
ret = lx_recvmsg(reply_channel[LOCAL_SOCKET], recv_msg.msg(), 0); ret = lx_recvmsg(reply_channel[LOCAL_SOCKET], recv_msg.msg(), 0);
if (ret < 0) { if (ret < 0) {
PRAW("lx_recvmsg failed with %d in lx_call()", ret); PRAW("lx_recvmsg failed with %d in lx_call()", ret);
/* XXX */ wait_for_continue();
throw Genode::Ipc_error(); throw Genode::Ipc_error();
} }
/*
* 'lx_recvmsg()' returns the number of bytes received. remember this value
* in 'Genode::Msgbuf_base'
*
* XXX revisit whether we really need this information
*/
recv_msgbuf.used_size(ret);
if (recv_msg.num_sockets() > 0) {
PRAW("lx_call: got %d sockets in reply", recv_msg.num_sockets());
for (unsigned i = 0; i < recv_msg.num_sockets(); i++) {
PRAW(" sd[%d]: %d", i, recv_msg.socket_at_index(i));
lx_close(recv_msg.socket_at_index(i));
}
}
/* destroy reply channel */ /* destroy reply channel */
lx_close(reply_channel[LOCAL_SOCKET]); lx_close(reply_channel[LOCAL_SOCKET]);
lx_close(reply_channel[REMOTE_SOCKET]); lx_close(reply_channel[REMOTE_SOCKET]);
@ -219,22 +272,46 @@ static void lx_call(long thread_id,
* \return socket descriptor of reply capability * \return socket descriptor of reply capability
*/ */
static int lx_wait(Genode::Native_connection_state &cs, static int lx_wait(Genode::Native_connection_state &cs,
void *buf, Genode::size_t buf_len) Genode::Msgbuf_base &recv_msgbuf)
{ {
int ret;
Message msg; Message msg;
msg.prepare_reply_socket_slot(); msg.accept_sockets(Message::MAX_SDS_PER_MSG);
msg.buffer(buf, buf_len); msg.buffer(recv_msgbuf.buf, recv_msgbuf.size());
ret = lx_recvmsg(cs, msg.msg(), 0); int ret = lx_recvmsg(cs, msg.msg(), 0);
if (ret < 0) { if (ret < 0) {
PRAW("lx_recvmsg failed with %d in lx_wait()", ret); PRAW("lx_recvmsg failed with %d in lx_wait()", ret);
/* XXX */ wait_for_continue();
throw Genode::Ipc_error(); throw Genode::Ipc_error();
} }
return msg.reply_socket(); /*
* 'lx_recvmsg()' returned message size, keep it in 'recv_msgbuf'.
*
* XXX revisit whether this information is actually needed.
*/
recv_msgbuf.used_size(ret);
if (msg.num_sockets() > 1) {
PRAW("lx_wait: got %d sockets from wait", msg.num_sockets());
for (unsigned i = 0; i < msg.num_sockets(); i++) {
PRAW(" sd[%d]: %d", i, msg.socket_at_index(i));
/* don't close reply channel */
if (i > 0)
lx_close(msg.socket_at_index(i));
}
}
int reply_socket = msg.unmarshal_socket();
/*
* Copy-out additional sds from msg to recv_msgbuf
*/
return reply_socket;
} }
@ -242,15 +319,13 @@ static int lx_wait(Genode::Native_connection_state &cs,
* Utility: Send reply to client * Utility: Send reply to client
*/ */
static void lx_reply(int reply_socket, static void lx_reply(int reply_socket,
void *buf, Genode::size_t buf_len) Genode::Msgbuf_base &send_msgbuf)
{ {
int ret;
Message msg; Message msg;
msg.buffer(buf, buf_len); msg.buffer(send_msgbuf.buf, send_msgbuf.used_size());
ret = lx_sendmsg(reply_socket, msg.msg(), 0); int ret = lx_sendmsg(reply_socket, msg.msg(), 0);
if (ret < 0) if (ret < 0)
PRAW("lx_sendmsg failed with %d in lx_reply()", ret); PRAW("lx_sendmsg failed with %d in lx_reply()", ret);

View File

@ -64,6 +64,12 @@ inline int lx_close(int fd)
} }
inline int lx_dup(int fd)
{
return lx_syscall(SYS_dup, fd);
}
inline int lx_unlink(const char *fname) inline int lx_unlink(const char *fname)
{ {
return lx_syscall(SYS_unlink, fname); return lx_syscall(SYS_unlink, fname);