Skip to content

Commit 10500b8

Browse files
dipinhoraSeanTAllen
authored andcommitted
Fix ASIO one shot lost notifications problem (#2897)
Prior to this commit, the way the ASIO notifications worked on linux epoll included an edge case/race condition that allowed for a loss of ASIO events. As part of testing wallaroo scalability, we encountered an issue (WallarooLabs/wally#2547) where some of our TCP actors would stop reading from the socket for no reason even though there was data waiting on the socket. After much fighting, @SeanTAllen inferred that the ASIO subsystem was losing notifications and the TCP actor was never notified about the data waiting on the socket. This commit changes things so that going forward: * ASIO one shot subscriptions do *not* use edge triggered mode, but instead use level triggered mode * Always tries to resubscribe to both read and write depending on the state of the `ev->readable` and `ev->writeable` booleans * Only send exactly one notification to one shot subscribers for either reads or writes, until they reset the `ev->readable` and/or `ev->writeable` booleans and resubscribe for notifications again in case of spurious notifications from epoll due to the use of level triggered mode (even though one shot should ensure that we only get one notification). These changes ensure that if there's data on the socket at the time we (re)subscribe to reads, we will be told immediately by epoll that there is a read notification. Similarly, if the socket is writeable at the time we (re)subscribe to writes, we will be told immediately by epoll that there is a write notification. Both of these ensure that the ASIO subsystem will always be told by epoll when the socket is readable or writeable and avoid any race conditions due to edge triggering and the state of the socket at the time the subscription occurs.
1 parent 210dc8a commit 10500b8

File tree

2 files changed

+78
-52
lines changed

2 files changed

+78
-52
lines changed

src/libponyrt/asio/epoll.c

+78-43
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,14 @@ void ponyint_asio_backend_final(asio_backend_t* b)
144144
eventfd_write(b->wakeup, 1);
145145
}
146146

147-
PONY_API void pony_asio_event_resubscribe_write(asio_event_t* ev)
147+
// Single function for resubscribing to both reads and writes for an event
148+
PONY_API void pony_asio_event_resubscribe(asio_event_t* ev)
148149
{
150+
// needs to be a valid event that is one shot enabled
149151
if((ev == NULL) ||
150152
(ev->flags == ASIO_DISPOSABLE) ||
151-
(ev->flags == ASIO_DESTROYED))
153+
(ev->flags == ASIO_DESTROYED) ||
154+
!(ev->flags & ASIO_ONESHOT))
152155
{
153156
pony_assert(0);
154157
return;
@@ -160,44 +163,44 @@ PONY_API void pony_asio_event_resubscribe_write(asio_event_t* ev)
160163
struct epoll_event ep;
161164
ep.data.ptr = ev;
162165
ep.events = 0;
166+
bool something_to_resub = false;
163167

164-
if(ev->flags & ASIO_ONESHOT)
165-
ep.events |= EPOLLONESHOT;
166-
168+
// if the event is supposed to be listening for write notifications
169+
// and it is currently not writeable
167170
if((ev->flags & ASIO_WRITE) && !ev->writeable)
171+
{
172+
something_to_resub = true;
168173
ep.events |= EPOLLOUT;
169-
else
170-
return;
171-
172-
epoll_ctl(b->epfd, EPOLL_CTL_MOD, ev->fd, &ep);
173-
}
174+
}
174175

175-
PONY_API void pony_asio_event_resubscribe_read(asio_event_t* ev)
176-
{
177-
if((ev == NULL) ||
178-
(ev->flags == ASIO_DISPOSABLE) ||
179-
(ev->flags == ASIO_DESTROYED))
176+
// if the event is supposed to be listening for read notifications
177+
// and it is currently not readable
178+
if((ev->flags & ASIO_READ) && !ev->readable)
180179
{
181-
pony_assert(0);
182-
return;
180+
something_to_resub = true;
181+
ep.events |= EPOLLRDHUP;
182+
ep.events |= EPOLLIN;
183183
}
184184

185-
asio_backend_t* b = ponyint_asio_get_backend();
186-
pony_assert(b != NULL);
187-
188-
struct epoll_event ep;
189-
ep.data.ptr = ev;
190-
ep.events = EPOLLRDHUP | EPOLLET;
191-
192-
if(ev->flags & ASIO_ONESHOT)
193-
ep.events |= EPOLLONESHOT;
185+
// only resubscribe if there is something to resubscribe to
186+
if (something_to_resub)
187+
epoll_ctl(b->epfd, EPOLL_CTL_MOD, ev->fd, &ep);
188+
}
194189

195-
if((ev->flags & ASIO_READ) && !ev->readable)
196-
ep.events |= EPOLLIN;
197-
else
198-
return;
190+
// Kept to maintain backwards compatibility so folks don't
191+
// have to change their code to use `pony_asio_event_resubscribe`
192+
// immediately
193+
PONY_API void pony_asio_event_resubscribe_write(asio_event_t* ev)
194+
{
195+
pony_asio_event_resubscribe(ev);
196+
}
199197

200-
epoll_ctl(b->epfd, EPOLL_CTL_MOD, ev->fd, &ep);
198+
// Kept to maintain backwards compatibility so folks don't
199+
// have to change their code to use `pony_asio_event_resubscribe`
200+
// immediately
201+
PONY_API void pony_asio_event_resubscribe_read(asio_event_t* ev)
202+
{
203+
pony_asio_event_resubscribe(ev);
201204
}
202205

203206
DECLARE_THREAD_FN(ponyint_asio_backend_dispatch)
@@ -235,17 +238,35 @@ DECLARE_THREAD_FN(ponyint_asio_backend_dispatch)
235238
{
236239
if(ep->events & (EPOLLIN | EPOLLRDHUP | EPOLLHUP | EPOLLERR))
237240
{
238-
flags |= ASIO_READ;
239-
ev->readable = true;
241+
// Send read notification to an actor if either
242+
// * the event is not a one shot event
243+
// * the event is a one shot event and we haven't already sent a notification
244+
// if the event is a one shot event and we have already sent a notification
245+
// don't send another one until we are asked for it again (i.e. the actor
246+
// gets a 0 byte read and sets `readable` to false and resubscribes to reads
247+
if(((ev->flags & ASIO_ONESHOT) && !ev->readable) || !(ev->flags & ASIO_ONESHOT))
248+
{
249+
ev->readable = true;
250+
flags |= ASIO_READ;
251+
}
240252
}
241253
}
242254

243255
if(ev->flags & ASIO_WRITE)
244256
{
245257
if(ep->events & EPOLLOUT)
246258
{
247-
flags |= ASIO_WRITE;
248-
ev->writeable = true;
259+
// Send write notification to an actor if either
260+
// * the event is not a one shot event
261+
// * the event is a one shot event and we haven't already sent a notification
262+
// if the event is a one shot event and we have already sent a notification
263+
// don't send another one until we are asked for it again (i.e. the actor
264+
// gets partial write and sets `writeable` to false and resubscribes to writes
265+
if(((ev->flags & ASIO_ONESHOT) && !ev->writeable) || !(ev->flags & ASIO_ONESHOT))
266+
{
267+
flags |= ASIO_WRITE;
268+
ev->writeable = true;
269+
}
249270
}
250271
}
251272

@@ -272,15 +293,20 @@ DECLARE_THREAD_FN(ponyint_asio_backend_dispatch)
272293
}
273294
}
274295

296+
// if we had a valid event of some type that needs to be sent
297+
// to an actor
275298
if(flags != 0)
276299
{
277-
if (!(flags & ASIO_DESTROYED) && !(flags & ASIO_DISPOSABLE))
278-
{
279-
if(ev->auto_resub && !(flags & ASIO_WRITE))
280-
pony_asio_event_resubscribe_write(ev);
281-
if(ev->auto_resub && !(flags & ASIO_READ))
282-
pony_asio_event_resubscribe_read(ev);
283-
}
300+
// if this event hasn't been destroyed or disposed.
301+
// to avoid a race condition if destroyed or dispoed events
302+
// are resubscribed
303+
if((ev->flags != ASIO_DISPOSABLE) && (ev->flags != ASIO_DESTROYED))
304+
// if this event is using one shot and should auto resubscribe and
305+
// then resubscribe
306+
if(ev->flags & ASIO_ONESHOT)
307+
pony_asio_event_resubscribe(ev);
308+
309+
// send the event to the actor
284310
pony_asio_event_send(ev, flags, count);
285311
}
286312
}
@@ -377,9 +403,18 @@ PONY_API void pony_asio_event_subscribe(asio_event_t* ev)
377403

378404
if(ev->flags & ASIO_ONESHOT) {
379405
ep.events |= EPOLLONESHOT;
380-
ev->auto_resub = true;
406+
// disable edge triggering if one shot is enabled
407+
// this is because of how the runtime gets notifications
408+
// from epoll in this ASIO thread and then notifies the
409+
// appropriate actor to read/write as necessary.
410+
// specifically, it seems there's an edge case/race condition
411+
// with edge triggering where if there is already data waiting
412+
// on the socket, then epoll might not be triggering immediately
413+
// when an edge triggered epoll request is made.
414+
ep.events &= ~EPOLLET;
381415
}
382416

417+
383418
epoll_ctl(b->epfd, EPOLL_CTL_ADD, ev->fd, &ep);
384419
}
385420

src/libponyrt/asio/event.h

-9
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,6 @@ typedef struct asio_event_t
2222
bool noisy; /* prevents termination? */
2323
uint64_t nsec; /* nanoseconds for timers */
2424

25-
#ifdef PLATFORM_IS_LINUX
26-
// automagically resubscribe to an event on an FD when another event is
27-
// on the same FD is triggered
28-
// This is only needed on linux where an event firing on an FD that has
29-
// ONESHOT enabled will clear all events for the FD and not only the
30-
// event that was fired.
31-
bool auto_resub; /* automagically resubscribe? */
32-
#endif
33-
3425
bool readable; /* is fd readable? */
3526
bool writeable; /* is fd writeable? */
3627
#ifdef PLATFORM_IS_WINDOWS

0 commit comments

Comments
 (0)