Support for socket-descriptor marshalling

This patch adds prinicipal support for transmitting socket descriptors
as RPC payload. Socket descriptors are handled by the linux-specific
implementation of the capability marshalling and unmarshalling functions
in 'ipc.h'. The 'Message' type in 'src/platform/linux_socket.h' has been
extended to carry multiple descriptors in a single message.

Unfortuately, we hit a problem (and potential show stopper) here:

  lx_sendmsg failed with -109 in lx_call()

The error code corresponds to ETOOMANYREFS. There is only one place in
the Linux kernel where this error code is used (net/unix/af_unix.c).
The code for 'unix_attach_fds()' suggests that there is a limit with
regard to the maximum number of references for a given Unix domain
socket. When the error occurs, core and init are running. The socket
of core's server entrypoint is present in the '/proc/pid/fd' of those
processes 8 times. The error occurs when core tries to perform an
RPC to the entrypoint to perform 'Ram_session::transfer_quota()'
(base/include/base/child.h at line 248).
This commit is contained in:
Norman Feske 2012-07-26 20:29:45 +02:00
parent 8b343d7e1a
commit ca4f574f4c
5 changed files with 270 additions and 60 deletions

View File

@ -0,0 +1,65 @@
/*
* \brief Linux-specific supplements to the IPC framework
* \author Norman Feske
* \date 2012-07-26
*/
/*
* Copyright (C) 2012 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU General Public License version 2.
*/
#ifndef _INCLUDE__BASE__IPC_H_
#define _INCLUDE__BASE__IPC_H_
#include <base/ipc_generic.h>
#include <base/snprintf.h>
extern "C" int raw_write_str(const char *str);
extern "C" void wait_for_continue(void);
#define PRAWW(fmt, ...) \
do { \
char str[128]; \
Genode::snprintf(str, sizeof(str), \
ESC_ERR fmt ESC_END "\n", ##__VA_ARGS__); \
raw_write_str(str); \
} while (0)
inline void Genode::Ipc_ostream::_marshal_capability(Genode::Native_capability const &cap)
{
PRAWW("_marshal_capability called: local_name=%ld, tid=%ld, socket=%d",
cap.local_name(), cap.dst().tid, cap.dst().socket);
_write_to_buf(cap.local_name());
_write_to_buf(cap.dst().tid);
if (cap.valid()) {
PRAWW("append_cap");
_snd_msg->append_cap(cap.dst().socket);
}
}
inline void Genode::Ipc_istream::_unmarshal_capability(Genode::Native_capability &cap)
{
long local_name = 0;
long tid = 0;
int socket = -1;
_read_from_buf(local_name);
_read_from_buf(tid);
bool const cap_valid = (tid != 0);
if (cap_valid)
socket = _rcv_msg->read_cap();
cap = Native_capability(Cap_dst_policy::Dst(tid, socket), local_name);
}
#endif /* _INCLUDE__BASE__IPC_H_ */

View File

@ -21,10 +21,30 @@ namespace Genode {
*/
class Msgbuf_base
{
public:
enum { MAX_CAPS_PER_MSG = 8 };
protected:
/*
* Capabilities (file descriptors) to be transferred
*/
int _caps[MAX_CAPS_PER_MSG];
Genode::size_t _used_caps;
Genode::size_t _read_cap_index;
/**
* Maximum size of plain-data message payload
*/
Genode::size_t _size;
char _msg_start[]; /* symbol marks start of message buffer data */
/**
* Actual size of plain-data message payload
*/
Genode::size_t _used_size;
char _msg_start[]; /* symbol marks start of message buffer data */
/*
* No member variables are allowed beyond this point!
@ -34,6 +54,8 @@ namespace Genode {
char buf[];
Msgbuf_base() { reset_caps(); }
/**
* Return size of message buffer
*/
@ -43,6 +65,36 @@ namespace Genode {
* Return address of message buffer
*/
inline void *addr() { return &_msg_start[0]; };
void reset_caps() { _used_caps = 0; _read_cap_index = 0; }
bool append_cap(int cap)
{
if (_used_caps == MAX_CAPS_PER_MSG)
return false;
_caps[_used_caps++] = cap;
return true;
}
int read_cap()
{
if (_read_cap_index == _used_caps)
return -1;
return _caps[_read_cap_index++];
}
size_t used_caps() const { return _used_caps; }
int cap(unsigned index) const
{
return index < _used_caps ? _caps[index] : -1;
}
size_t used_size() const { return _used_size; }
void used_size(size_t used_size) { _used_size = used_size; }
};

View File

@ -92,9 +92,10 @@ void Ipc_istream::_wait()
Ipc_istream::Ipc_istream(Msgbuf_base *rcv_msg)
: Ipc_unmarshaller(rcv_msg->buf, rcv_msg->size()),
Native_capability(Dst(lx_gettid(), -1), 0),
_rcv_msg(rcv_msg)
:
Ipc_unmarshaller(rcv_msg->buf, rcv_msg->size()),
Native_capability(Dst(lx_gettid(), -1), 0),
_rcv_msg(rcv_msg)
{ }
@ -115,15 +116,16 @@ void Ipc_client::_prepare_next_call()
/* prepare response buffer */
_read_offset = sizeof(long);
_snd_msg->reset_caps();
}
void Ipc_client::_call()
{
if (Ipc_ostream::_dst.valid()) {
lx_call(Ipc_ostream::_dst.dst().tid,
_snd_msg->buf, _write_offset,
_rcv_msg->buf, _rcv_msg->size());
_snd_msg->used_size(_write_offset);
lx_call(Ipc_ostream::_dst.dst().tid, *_snd_msg, *_rcv_msg);
}
_prepare_next_call();
}
@ -153,6 +155,9 @@ void Ipc_server::_prepare_next_reply_wait()
/* leave space for exc code at the beginning of the msgbuf */
_write_offset += align_natural(sizeof(int));
/* reset capability slots of send message buffer */
_snd_msg->reset_caps();
}
@ -161,7 +166,7 @@ void Ipc_server::_wait()
_reply_needed = true;
try {
int const reply_socket = lx_wait(_rcv_cs, _rcv_msg->buf, _rcv_msg->size());
int const reply_socket = lx_wait(_rcv_cs, *_rcv_msg);
/*
* Remember reply capability
@ -185,7 +190,8 @@ void Ipc_server::_wait()
void Ipc_server::_reply()
{
try {
lx_reply(Ipc_ostream::_dst.dst().socket, _snd_msg->buf, _write_offset);
_snd_msg->used_size(_write_offset);
lx_reply(Ipc_ostream::_dst.dst().socket, *_snd_msg);
} catch (Ipc_error) { }
_prepare_next_reply_wait();
@ -195,8 +201,10 @@ void Ipc_server::_reply()
void Ipc_server::_reply_wait()
{
/* when first called, there was no request yet */
if (_reply_needed)
lx_reply(Ipc_ostream::_dst.dst().socket, _snd_msg->buf, _write_offset);
if (_reply_needed) {
_snd_msg->used_size(_write_offset);
lx_reply(Ipc_ostream::_dst.dst().socket, *_snd_msg);
}
_wait();
}
@ -231,5 +239,9 @@ Ipc_server::Ipc_server(Msgbuf_base *snd_msg, Msgbuf_base *rcv_msg)
if (thread)
thread->tid().is_ipc_server = true;
/* override capability initialization performed by 'Ipc_istream' */
*static_cast<Native_capability *>(this) =
Native_capability(Native_capability::Dst(lx_gettid(), _rcv_cs), 0);
_prepare_next_reply_wait();
}

View File

@ -57,16 +57,22 @@ namespace {
*/
struct Message
{
public:
enum { MAX_SDS_PER_MSG = Genode::Msgbuf_base::MAX_CAPS_PER_MSG };
private:
msghdr _msg;
sockaddr_un _addr;
iovec _iovec;
char _cmsg_buf[CMSG_SPACE(sizeof(int))];
char _cmsg_buf[CMSG_SPACE(MAX_SDS_PER_MSG*sizeof(int))];
unsigned _num_sds;
public:
Message(long server_thread_id = -1)
Message(long server_thread_id = -1) : _num_sds(0)
{
memset(&_msg, 0, sizeof(_msg));
@ -77,6 +83,18 @@ namespace {
_msg.msg_name = &_addr;
_msg.msg_namelen = sizeof(_addr);
}
/* initialize control message */
struct cmsghdr *cmsg;
_msg.msg_control = _cmsg_buf;
_msg.msg_controllen = sizeof(_cmsg_buf); /* buffer space available */
_msg.msg_flags |= MSG_CMSG_CLOEXEC;
cmsg = CMSG_FIRSTHDR(&_msg);
cmsg->cmsg_len = CMSG_LEN(0);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
_msg.msg_controllen = cmsg->cmsg_len; /* actual cmsg length */
}
msghdr * msg() { return &_msg; }
@ -91,45 +109,47 @@ namespace {
_iovec.iov_len = buffer_len;
}
/**
* Prepare slot for socket sending/reception
*
* Note, if this function is not called sockets are not accepted on
* 'recvmsg' and, therefore, do not occupy local file descriptors.
*/
void prepare_reply_socket_slot()
void marshal_socket(int sd)
{
/* initialize control message */
struct cmsghdr *cmsg;
*((int *)CMSG_DATA((cmsghdr *)_cmsg_buf) + _num_sds) = sd;
_msg.msg_control = _cmsg_buf;
_msg.msg_controllen = sizeof(_cmsg_buf); /* buffer space available */
_msg.msg_flags |= MSG_CMSG_CLOEXEC;
cmsg = CMSG_FIRSTHDR(&_msg);
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
_num_sds++;
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&_msg);
cmsg->cmsg_len = CMSG_LEN(_num_sds*sizeof(int));
_msg.msg_controllen = cmsg->cmsg_len; /* actual cmsg length */
}
void reply_socket(int sd)
void accept_sockets(int num_sds)
{
if (!_msg.msg_control) {
PRAW("reply-socket slot not prepared");
throw Genode::Ipc_error();
}
*(int *)CMSG_DATA((cmsghdr *)_cmsg_buf) = sd;
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&_msg);
cmsg->cmsg_len = CMSG_LEN(num_sds*sizeof(int));
_msg.msg_controllen = cmsg->cmsg_len; /* actual cmsg length */
}
int reply_socket() const
int unmarshal_socket()
{
if (!_msg.msg_control) {
PRAW("reply-socket slot not prepared");
throw Genode::Ipc_error();
}
int ret = *((int *)CMSG_DATA((cmsghdr *)_cmsg_buf) + _num_sds);
return *(int *)CMSG_DATA((cmsghdr *)_cmsg_buf);
_num_sds++;
return ret;
}
/* XXX only for debugging */
int socket_at_index(int index)
{
return *((int *)CMSG_DATA((cmsghdr *)_cmsg_buf) + index);
}
unsigned num_sockets() const
{
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&_msg);
if (!cmsg)
return 0;
return (cmsg->cmsg_len - CMSG_ALIGN(sizeof(cmsghdr)))/sizeof(int);
}
};
@ -170,8 +190,8 @@ static int lx_server_socket(Genode::Thread_base *thread)
* Utility: Send request to server and wait for reply
*/
static void lx_call(long thread_id,
void *send_buf, Genode::size_t send_buf_len,
void *recv_buf, Genode::size_t recv_buf_len)
Genode::Msgbuf_base &send_msgbuf,
Genode::Msgbuf_base &recv_msgbuf)
{
int ret;
@ -184,29 +204,62 @@ static void lx_call(long thread_id,
ret = lx_socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, reply_channel);
if (ret < 0) {
PRAW("lx_socketpair failed with %d", ret);
/* XXX */ wait_for_continue();
throw Genode::Ipc_error();
}
send_msg.prepare_reply_socket_slot();
send_msg.reply_socket(reply_channel[REMOTE_SOCKET]);
send_msg.buffer(send_buf, send_buf_len);
/* assemble message */
/* marshal reply capability */
send_msg.marshal_socket(reply_channel[REMOTE_SOCKET]);
/* marshal capabilities contained in 'send_msgbuf' */
if (send_msgbuf.used_caps() > 0)
PRAW("lx_call: marshal %d caps:", send_msgbuf.used_caps());
for (unsigned i = 0; i < send_msgbuf.used_caps(); i++) {
PRAW(" sd[%d]: %d", i, send_msgbuf.cap(i));
send_msg.marshal_socket(lx_dup(send_msgbuf.cap(i)));
}
send_msg.buffer(send_msgbuf.buf, send_msgbuf.used_size());
ret = lx_sendmsg(reply_channel[LOCAL_SOCKET], send_msg.msg(), 0);
if (ret < 0) {
PRAW("lx_sendmsg failed with %d in lx_call()", ret);
while (1);
/* XXX */ wait_for_continue();
throw Genode::Ipc_error();
}
Message recv_msg;
recv_msg.accept_sockets(Message::MAX_SDS_PER_MSG);
recv_msg.buffer(recv_buf, recv_buf_len);
recv_msg.buffer(recv_msgbuf.buf, recv_msgbuf.size());
ret = lx_recvmsg(reply_channel[LOCAL_SOCKET], recv_msg.msg(), 0);
if (ret < 0) {
PRAW("lx_recvmsg failed with %d in lx_call()", ret);
/* XXX */ wait_for_continue();
throw Genode::Ipc_error();
}
/*
* 'lx_recvmsg()' returns the number of bytes received. remember this value
* in 'Genode::Msgbuf_base'
*
* XXX revisit whether we really need this information
*/
recv_msgbuf.used_size(ret);
if (recv_msg.num_sockets() > 0) {
PRAW("lx_call: got %d sockets in reply", recv_msg.num_sockets());
for (unsigned i = 0; i < recv_msg.num_sockets(); i++) {
PRAW(" sd[%d]: %d", i, recv_msg.socket_at_index(i));
lx_close(recv_msg.socket_at_index(i));
}
}
/* destroy reply channel */
lx_close(reply_channel[LOCAL_SOCKET]);
lx_close(reply_channel[REMOTE_SOCKET]);
@ -219,22 +272,46 @@ static void lx_call(long thread_id,
* \return socket descriptor of reply capability
*/
static int lx_wait(Genode::Native_connection_state &cs,
void *buf, Genode::size_t buf_len)
Genode::Msgbuf_base &recv_msgbuf)
{
int ret;
Message msg;
msg.prepare_reply_socket_slot();
msg.buffer(buf, buf_len);
msg.accept_sockets(Message::MAX_SDS_PER_MSG);
msg.buffer(recv_msgbuf.buf, recv_msgbuf.size());
ret = lx_recvmsg(cs, msg.msg(), 0);
int ret = lx_recvmsg(cs, msg.msg(), 0);
if (ret < 0) {
PRAW("lx_recvmsg failed with %d in lx_wait()", ret);
/* XXX */ wait_for_continue();
throw Genode::Ipc_error();
}
return msg.reply_socket();
/*
* 'lx_recvmsg()' returned message size, keep it in 'recv_msgbuf'.
*
* XXX revisit whether this information is actually needed.
*/
recv_msgbuf.used_size(ret);
if (msg.num_sockets() > 1) {
PRAW("lx_wait: got %d sockets from wait", msg.num_sockets());
for (unsigned i = 0; i < msg.num_sockets(); i++) {
PRAW(" sd[%d]: %d", i, msg.socket_at_index(i));
/* don't close reply channel */
if (i > 0)
lx_close(msg.socket_at_index(i));
}
}
int reply_socket = msg.unmarshal_socket();
/*
* Copy-out additional sds from msg to recv_msgbuf
*/
return reply_socket;
}
@ -242,15 +319,13 @@ static int lx_wait(Genode::Native_connection_state &cs,
* Utility: Send reply to client
*/
static void lx_reply(int reply_socket,
void *buf, Genode::size_t buf_len)
Genode::Msgbuf_base &send_msgbuf)
{
int ret;
Message msg;
msg.buffer(buf, buf_len);
msg.buffer(send_msgbuf.buf, send_msgbuf.used_size());
ret = lx_sendmsg(reply_socket, msg.msg(), 0);
int ret = lx_sendmsg(reply_socket, msg.msg(), 0);
if (ret < 0)
PRAW("lx_sendmsg failed with %d in lx_reply()", ret);

View File

@ -64,6 +64,12 @@ inline int lx_close(int fd)
}
inline int lx_dup(int fd)
{
return lx_syscall(SYS_dup, fd);
}
inline int lx_unlink(const char *fname)
{
return lx_syscall(SYS_unlink, fname);