The unified diff between revisions [14a5ae7c..] and [c025ed65..] is displayed below. It can also be downloaded as a raw diff.

#
#
# patch "src/comm_kqueue.c"
#  from [f513f3b3d07e675ac06e4380826484fe9f061535]
#    to [85ce34a77bc1bc8507541e04cf830e98f1aebf48]
#
# patch "src/comm_select.c"
#  from [4fdf649d1a0cb837d69227cf82d7a45bd0f86ba3]
#    to [5c6578f4d4b7473343347d251b23135717bfd543]
#
# patch "src/protos.h"
#  from [ecfd0bcf3229e6ad3df49aaf178a1ee35692ec0b]
#    to [9fb6496cd73419721ba2a0127ca527ce5ab96be4]
#
# patch "src/structs.h"
#  from [54f1dc039ccb7f431a72bbf25e11e74a5964b3de]
#    to [14d20fdde081243ab2ca579adb65e6e21c864d02]
#
============================================================
--- src/comm_kqueue.c	f513f3b3d07e675ac06e4380826484fe9f061535
+++ src/comm_kqueue.c	85ce34a77bc1bc8507541e04cf830e98f1aebf48
@@ -7,47 +7,330 @@
  * support into separate files.
  */

-static void checkTimeouts(void);

+#include "squid.h"
+#include <sys/event.h>

-/* Defer reads from this fd */
+#define KE_LENGTH        128
+
+/* jlemon goofed up and didn't add EV_SET until fbsd 4.3 */
+
+#ifndef EV_SET
+#define EV_SET(kevp, a, b, c, d, e, f) do {     \
+        (kevp)->ident = (a);                    \
+        (kevp)->filter = (b);                   \
+        (kevp)->flags = (c);                    \
+        (kevp)->fflags = (d);                   \
+        (kevp)->data = (e);                     \
+        (kevp)->udata = (f);                    \
+} while(0)
+#endif
+
+static void kq_update_events(int, short, PF *);
+static int kq;
+
+static struct timespec zero_timespec;
+
+static struct kevent *kqlst;        /* kevent buffer */
+static int kqmax;                /* max structs to buffer */
+static int kqoff;                /* offset into the buffer */
+static int max_poll_time = 1000;
+
+
+void checkTimeouts(void);
+int commDeferRead(int fd);
+
+
+/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
+/* Private functions */
+
 void
-commDeferFD(int fd)
+kq_update_events(int fd, short filter, PF * handler)
 {
+    PF *cur_handler;
+    int kep_flags;
+
+#if 0
+
+    int retval;
+#endif
+
+    switch (filter) {
+
+    case EVFILT_READ:
+        cur_handler = fd_table[fd].read_handler;
+        break;
+
+    case EVFILT_WRITE:
+        cur_handler = fd_table[fd].write_handler;
+        break;
+
+    default:
+        /* XXX bad! -- adrian */
+        return;
+        break;
+    }
+
+    if ((cur_handler == NULL && handler != NULL)
+            || (cur_handler != NULL && handler == NULL)) {
+
+        struct kevent *kep;
+
+        kep = kqlst + kqoff;
+
+        if (handler != NULL) {
+            kep_flags = (EV_ADD | EV_ONESHOT);
+        } else {
+            kep_flags = EV_DELETE;
+        }
+
+        EV_SET(kep, (uintptr_t) fd, filter, kep_flags, 0, 0, 0);
+
+        if (kqoff == kqmax) {
+            int ret;
+
+            ret = kevent(kq, kqlst, kqoff, NULL, 0, &zero_timespec);
+            /* jdc -- someone needs to do error checking... */
+
+            if (ret == -1) {
+                perror("kq_update_events(): kevent()");
+                return;
+            }
+
+            kqoff = 0;
+        } else {
+            kqoff++;
+        }
+
+#if 0
+        if (retval < 0) {
+            /* Error! */
+
+            if (ke.flags & EV_ERROR) {
+                errno = ke.data;
+            }
+        }
+
+#endif
+
+    }
 }


-/* Resume reading from the given fd */
+
+/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
+/* Public functions */
+
+
+/*
+ * comm_select_init
+ *
+ * This is a needed exported function which will be called to initialise
+ * the network loop code.
+ */
 void
-commResumeFD(int fd)
+comm_select_init(void)
 {
+    kq = kqueue();
+
+    if (kq < 0) {
+        fatal("comm_select_init: Couldn't open kqueue fd!\n");
+    }
+
+    kqmax = getdtablesize();
+
+    kqlst = (struct kevent *)xmalloc(sizeof(*kqlst) * kqmax);
+    zero_timespec.tv_sec = 0;
+    zero_timespec.tv_nsec = 0;
+    bzero(backoff_fds, sizeof(backoff_fds));
 }

+/*
+ * comm_setselect
+ *
+ * This is a needed exported function which will be called to register
+ * and deregister interest in a pending IO state for a given FD.
+ */
 void
-comm_select_init()
+commSetSelect(int fd, unsigned int type, PF * handler,
+              void *client_data, time_t timeout)
 {
+    fde *F = &fd_table[fd];
+    assert(fd >= 0);
+    assert(F->flags.open);
+
+    if ((! F->comm_backoff) && type & COMM_SELECT_READ) {
+        /* Don't add the read handler if the FD is backed off */
+        kq_update_events(fd, EVFILT_READ, handler);
+        F->read_handler = handler;
+        F->read_data = client_data;
+    }
+
+    if (type & COMM_SELECT_WRITE) {
+        kq_update_events(fd, EVFILT_WRITE, handler);
+        F->write_handler = handler;
+        F->write_data = client_data;
+    }
+
+    if (timeout)
+        F->timeout = squid_curtime + timeout;
+
 }

+/*
+ * Check all connections for new connections and input data that is to be
+ * processed. Also check for connections with data queued and whether we can
+ * write it out.
+ */
+
+/*
+ * comm_select
+ *
+ * Called to do the new-style IO, courtesy of of squid (like most of this
+ * new IO code). This routine handles the stuff we've hidden in
+ * comm_setselect and fd_table[] and calls callbacks for IO ready
+ * events.
+ */
+
+int
+comm_kqueue(int msec)
+{
+    int num, i;
+    static time_t last_timeout = 0;
+
+    static struct kevent ke[KE_LENGTH];
+
+    struct timespec poll_time;
+
+    /*
+     * remember we are doing NANOseconds here, not micro/milli. God knows
+     * why jlemon used a timespec, but hey, he wrote the interface, not I
+     *   -- Adrian
+     */
+
+    if (msec > max_poll_time)
+        msec = max_poll_time;
+
+    poll_time.tv_sec = msec / 1000;
+
+    poll_time.tv_nsec = (msec % 1000) * 1000000;
+
+    for (;;) {
+        num = kevent(kq, kqlst, kqoff, ke, KE_LENGTH, &poll_time);
+        statCounter.select_loops++;
+        kqoff = 0;
+
+        if (num >= 0)
+            break;
+
+        if (ignoreErrno(errno))
+            break;
+
+        getCurrentTime();
+
+        return COMM_ERROR;
+
+        /* NOTREACHED */
+    }
+    getCurrentTime();
+    if (squid_curtime > last_timeout) {
+        last_timeout = squid_curtime;
+        checkTimeouts();
+    }
+    if (num == 0)
+        return COMM_OK;		/* No error.. */
+
+    for (i = 0; i < num; i++) {
+        int fd = (int) ke[i].ident;
+        PF *hdl = NULL;
+        fde *F = &fd_table[fd];
+
+        if (ke[i].flags & EV_ERROR) {
+            errno = ke[i].data;
+            /* XXX error == bad! -- adrian */
+            continue;        /* XXX! */
+        }
+
+        switch (ke[i].filter) {
+
+        case EVFILT_READ:
+
+            if ((hdl = F->read_handler) != NULL) {
+                F->read_handler = NULL;
+                hdl(fd, F->read_data);
+            }
+
+            break;
+
+        case EVFILT_WRITE:
+
+            if ((hdl = F->write_handler) != NULL) {
+                F->write_handler = NULL;
+                hdl(fd, F->write_data);
+            }
+
+            break;
+
+        default:
+            /* Bad! -- adrian */
+            debug(5, 1) ("comm_select: kevent returned %d!\n", ke[i].filter);
+            break;
+        }
+    }
+
+    return COMM_OK;
+}
+
 void
-commUpdateReadBits(int fd, PF *handler)
+commDeferFD(int fd)
 {
-    /* Not implemented */
+	int i;
+	fde *F;
+
+	assert(fd >= 0);
+	F = &fd_table[fd];
+
+	/* Find a free slot in the backoff_fds list */
+	for (i = 0; i < FD_SETSIZE; i++) {
+		if (!(backoff_fds[i]) || (backoff_fds[i] == fd)) {
+			break;
+		}
+	}
+	/* Make sure we can back off */
+	assert(i <= FD_SETSIZE);
+	assert(F->flags.open);
+
+	/* Ok, back off */
+	F->comm_backoff = 1;
+	backoff_fds[i] = fd;
+        kq_update_events(fd, EVFILT_READ, NULL);
 }
-
-void
-commUpdateWriteBits(int fd, PF *handler)
+
+void
+commResumeFD(int fd)
 {
-    /* Not implemented */
+	fde *F = &fd_table[fd];
+
+	/* If the FD has been modified then do nothing */
+	if(!(F->read_handler) || !(F->comm_backoff)) {
+		debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, comm_backoff=%d\n",fd,F->read_handler,F->comm_backoff);
+		F->comm_backoff=0;
+		return;
+	}
+	/* Rightio - mark it as resuming */
+	F->comm_backoff = 0;
+        kq_update_events(fd, EVFILT_READ, F->read_handler);
 }
-
+
 void
-commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, time_t timeout)
+commUpdateReadBits(int fd, PF *handler)
 {
+    /* Not imlpemented */
 }

-int
-comm_kqueue(int msec)
+void
+commUpdateWriteBits(int fd, PF *handler)
 {
-	checkTimeouts();
-	return 0;
+    /* Not imlpemented */
 }
+
============================================================
--- src/comm_select.c	4fdf649d1a0cb837d69227cf82d7a45bd0f86ba3
+++ src/comm_select.c	5c6578f4d4b7473343347d251b23135717bfd543
@@ -32,13 +32,11 @@
  *
  */

-#define	HAVE_SELECT_OR_POLL	(HAVE_POLL && (!HAVE_KQUEUE) && (!HAVE_EPOLL))
-#define	HAVE_SELECT		(!HAVE_POLL && !HAVE_EPOLL && !HAVE_KQUEUE)
-#define	HAVE_EVENTNETIO		(HAVE_KQUEUE || HAVE_EPOLL)
-
 #include "squid.h"

 static int MAX_POLL_TIME = 1000;	/* see also comm_quick_poll_required() */
+/* Array to keep track of backed off filedescriptors */
+static int backoff_fds[FD_SETSIZE];

 #if HAVE_SELECT_OR_POLL

@@ -950,6 +948,7 @@ comm_select_init(void)
     FD_ZERO(&global_readfds);
     FD_ZERO(&global_writefds);
     nreadfds = nwritefds = 0;
+    bzero(backoff_fds, sizeof(backoff_fds));
 }

 #if HAVE_SELECT
@@ -1081,8 +1080,6 @@ static struct epoll_event *pevents;
 static int kdpfd;
 static struct epoll_event *pevents;

-/* Array to keep track of backed off filedescriptors */
-static int backoff_fds[FD_SETSIZE];

 static void checkTimeouts(void);
 static int commDeferRead(int fd);
@@ -1106,53 +1103,7 @@ epolltype_atoi(int x)
     }
 }

-/* Bring all fds back online */
-void
-commEpollBackon() {
-    fde *F;
-    int i;
-    int fd;
-    int j=0;

-    for(i=0;i<FD_SETSIZE;i++) {
-	if(backoff_fds[i]) {
-	    /* record the fd and zero the descriptor */
-	    fd=backoff_fds[i];
-	    F=&fd_table[fd];
-	    backoff_fds[i]=0;
-
-	    /* If the fd is no longer backed off, ignore */
-	    if(!(F->epoll_backoff)) {
-		continue;
-	    }
-
-	    /* If the fd is still meant to be backed off, add it to the start of
-	       the list and continue */
-	    if(commDeferRead(fd) == 1) {
-		backoff_fds[j++]=fd;
-		continue;
-	    }
-
-	    debug(5, 4) ("commEpollBackon: fd=%d\n",fd);
-
-	    /* Resume operations for this fd */
-	    commResumeFD(fd);
-	}
-	else
-	{
-	    /* Once we hit a non-backed off FD, we can break */
-	    break;
-	}
-    }
-}
-
-
-/* Back off on the next epoll for the given fd */
-void
-commEpollBackoff(int fd) {
-    commDeferFD(fd);
-}
-
 /* Defer reads from this fd */
 void
 commDeferFD(int fd) {
@@ -1162,7 +1113,7 @@ commDeferFD(int fd) {
     int i;

     /* Return if the fd is already backed off */
-    if(F->epoll_backoff) {
+    if(F->comm_backoff) {
 	return;
     }

@@ -1202,7 +1153,7 @@ commDeferFD(int fd) {
     else
     {
 	backoff_fds[i]=fd;
-	F->epoll_backoff=1;
+	F->comm_backoff=1;
 	F->epoll_state=ev.events;
     }
 }
@@ -1217,9 +1168,9 @@ commResumeFD(int fd) {
     F=&fd_table[fd];

     /* If the fd has been modified, do nothing and remove the flag */
-    if(!(F->read_handler) || !(F->epoll_backoff)) {
-	debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, epoll_backoff=%d\n",fd,F->read_handler,F->epoll_backoff);
-	F->epoll_backoff=0;
+    if(!(F->read_handler) || !(F->comm_backoff)) {
+	debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, comm_backoff=%d\n",fd,F->read_handler,F->comm_backoff);
+	F->comm_backoff=0;
 	return;
     }

@@ -1245,7 +1196,7 @@ commResumeFD(int fd) {
     }
     else
     {
-	F->epoll_backoff=0;
+	F->comm_backoff=0;
 	F->epoll_state=ev.events;
     }
 }
@@ -1263,10 +1214,7 @@ comm_select_init()
     if (kdpfd < 0) {
 	fatalf("comm_select_init: epoll_create(): %s\n",xstrerror());
     }
-
-    for (i = 0; i < FD_SETSIZE; i++) {
-	backoff_fds[i]=0;
-    }
+    bzero(backoff_fds, sizeof(backoff_fds);
 }

 void
@@ -1297,7 +1245,7 @@ commSetSelect(int fd, unsigned int type,

     if (type & COMM_SELECT_READ) {
 	/* Only add the epoll event if the fd is not backed off */
-	if (handler && !(F->epoll_backoff)) {
+	if (handler && !(F->comm_backoff)) {
 	    ev.events |= EPOLLIN;
 	}

@@ -1305,7 +1253,7 @@ commSetSelect(int fd, unsigned int type,
 	F->read_data = client_data;

 	// Otherwise, use previously stored value if the fd is not backed off
-    } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && !(F->epoll_backoff)) {
+    } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && !(F->comm_backoff)) {
 	ev.events |= EPOLLIN;
     }

@@ -1386,7 +1334,7 @@ int comm_epoll(int msec)
 	    checkTimeouts();

 	    /* bring backed off connections back online */
-	    commEpollBackon();
+	    commBackon();
 	}

 	/* Check for disk io callbacks */
@@ -1422,7 +1370,7 @@ int comm_epoll(int msec)
 		if((hdl = F->read_handler) != NULL) {
 		    // If the descriptor is meant to be deferred, don't handle
 		    if(commDeferRead(fd) == 1) {
-			if(!(F->epoll_backoff)) {
+			if(!(F->comm_backoff)) {
 			    debug(5, 1) ("comm_epoll(): WARNING defer handler for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n",fd,F->desc);
 			    commEpollBackoff(fd);
 			}
@@ -1480,7 +1428,7 @@ WRITE_EVENT:
 #error HAVE_KQUEUE, HAVE_EPOLL, HAVE_POLL (and no select!) wasn't defined!
 #endif

-static int
+int
 commDeferRead(int fd)
 {
     fde *F = &fd_table[fd];
@@ -1489,7 +1437,7 @@ commDeferRead(int fd)
     return F->defer_check(fd, F->defer_data);
 }

-static void
+void
 checkTimeouts(void)
 {
     int fd;
@@ -1516,7 +1464,53 @@ checkTimeouts(void)
     }
 }

+/* Bring all fds back online */
+void
+commBackon() {
+    fde *F;
+    int i;
+    int fd;
+    int j=0;

+    for(i=0;i<FD_SETSIZE;i++) {
+	if(backoff_fds[i]) {
+	    /* record the fd and zero the descriptor */
+	    fd=backoff_fds[i];
+	    F=&fd_table[fd];
+	    backoff_fds[i]=0;
+
+	    /* If the fd is no longer backed off, ignore */
+	    if(!(F->comm_backoff)) {
+		continue;
+	    }
+
+	    /* If the fd is still meant to be backed off, add it to the start of
+	       the list and continue */
+	    if(commDeferRead(fd) == 1) {
+		backoff_fds[j++]=fd;
+		continue;
+	    }
+
+	    debug(5, 4) ("commEpollBackon: fd=%d\n",fd);
+
+	    /* Resume operations for this fd */
+	    commResumeFD(fd);
+	}
+	else
+	{
+	    /* Once we hit a non-backed off FD, we can break */
+	    break;
+	}
+    }
+}
+
+
+/* Back off on the next epoll for the given fd */
+void
+commBackoff(int fd) {
+    commDeferFD(fd);
+}
+
 /* Called by async-io or diskd to speed up the polling */
 void
 comm_quick_poll_required(void)
============================================================
--- src/protos.h	ecfd0bcf3229e6ad3df49aaf178a1ee35692ec0b
+++ src/protos.h	9fb6496cd73419721ba2a0127ca527ce5ab96be4
@@ -166,6 +166,8 @@ extern void commResumeFD(int fd);
 extern void commDeferFD(int fd);
 extern void commResumeFD(int fd);
 #endif
+extern void commBackoff(int fd);
+extern void commBackon(void);
 extern void commSetSelect(int, unsigned int, PF *, void *, time_t);
 extern void comm_add_close_handler(int fd, PF *, void *);
 extern void comm_remove_close_handler(int fd, PF *, void *);
============================================================
--- src/structs.h	54f1dc039ccb7f431a72bbf25e11e74a5964b3de
+++ src/structs.h	14d20fdde081243ab2ca579adb65e6e21c864d02
@@ -815,8 +815,8 @@ struct _fde {
     int uses;			/* ie # req's over persistent conn */
 #if HAVE_EPOLL
     unsigned epoll_state;	/* keep track of the epoll state */
-    unsigned epoll_backoff;	/* keep track of whether the fd is backed off */
 #endif
+    unsigned comm_backoff;	/* keep track of whether the fd is backed off */
     struct _fde_disk {
 	DWCB *wrt_handle;
 	void *wrt_handle_data;