diff --git a/doc/configuration.txt b/doc/configuration.txt index 2ce96167e..b34946485 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1294,6 +1294,25 @@ wurfl-useragent-priority { plain | sideloaded_browser } 3.2. Performance tuning ----------------------- +busy-polling + In some situations, especially when dealing with low latency on processors + supporting a variable frequency or when running inside virtual machines, each + time the process waits for an I/O using the poller, the processor goes back + to sleep or is offered to another VM for a long time, and it causes + excessively high latencies. This option provides a solution preventing the + processor from sleeping by always using a null timeout on the pollers. This + results in a significant latency reduction (30 to 100 microseconds observed) + at the expense of a risk to overheat the processor. It may even be used with + threads, in which case improperly bound threads may heavily conflict, + resulting in a worse performance and high values for the CPU stolen fields + in "show info" output, indicating which threads are misconfigured. It is + important not to let the process run on the same processor as the network + interrupts when this option is used. It is also better to avoid using it on + multiple CPU threads sharing the same core. This option is disabled by + default. If it has been enabled, it may still be forcibly disabled by + prefixing it with the "no" keyword. It is ignored by the "select" and + "poll" pollers. + max-spread-checks By default, haproxy tries to spread the start of health checks across the smallest health check interval of all the servers in a farm. The principle is diff --git a/include/types/global.h b/include/types/global.h index 5a3f338a4..24eeb0ce7 100644 --- a/include/types/global.h +++ b/include/types/global.h @@ -68,6 +68,8 @@ #define GTUNE_NOEXIT_ONFAILURE (1<<9) #define GTUNE_USE_SYSTEMD (1<<10) +#define GTUNE_BUSY_POLLING (1<<11) + /* Access level for a stats socket */ #define ACCESS_LVL_NONE 0 #define ACCESS_LVL_USER 1 diff --git a/include/types/stats.h b/include/types/stats.h index 85cc906da..a188667e8 100644 --- a/include/types/stats.h +++ b/include/types/stats.h @@ -295,6 +295,7 @@ enum info_field { INF_ACTIVE_PEERS, INF_CONNECTED_PEERS, INF_DROPPED_LOGS, + INF_BUSY_POLLING, /* must always be the last one */ INF_TOTAL_FIELDS diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c index a00d96255..4303ef987 100644 --- a/src/cfgparse-global.c +++ b/src/cfgparse-global.c @@ -73,6 +73,14 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) goto out; global.tune.options &= ~GTUNE_USE_POLL; } + else if (!strcmp(args[0], "busy-polling")) { /* "no busy-polling" or "busy-polling" */ + if (alertif_too_many_args(0, file, linenum, args, &err_code)) + goto out; + if (kwm == KWM_NO) + global.tune.options &= ~GTUNE_BUSY_POLLING; + else + global.tune.options |= GTUNE_BUSY_POLLING; + } else if (!strcmp(args[0], "nosplice")) { if (alertif_too_many_args(0, file, linenum, args, &err_code)) goto out; diff --git a/src/cfgparse.c b/src/cfgparse.c index 2c660ab0d..1e6566856 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -1871,8 +1871,8 @@ next_line: } if (kwm != KWM_STD && strcmp(args[0], "option") != 0 && \ - strcmp(args[0], "log") != 0) { - ha_alert("parsing [%s:%d]: negation/default currently supported only for options and log.\n", file, linenum); + strcmp(args[0], "log") != 0 && strcmp(args[0], "busy-polling")) { + ha_alert("parsing [%s:%d]: negation/default currently supported only for options, log, and busy-polling.\n", file, linenum); err_code |= ERR_ALERT | ERR_FATAL; } diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 272ded2b4..0bb8c9f89 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -27,6 +27,7 @@ #include #include +#include /* private data */ @@ -149,8 +150,22 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) wait_time = compute_poll_timeout(exp); tv_entering_poll(); activity_count_runtime(); - status = epoll_wait(epoll_fd[tid], epoll_events, global.tune.maxpollevents, wait_time); - tv_update_date(wait_time, status); + do { + int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time; + + status = epoll_wait(epoll_fd[tid], epoll_events, global.tune.maxpollevents, timeout); + tv_update_date(timeout, status); + + if (status) + break; + if (timeout || !wait_time) + break; + if (signal_queue_len) + break; + if (tick_isset(exp) && tick_is_expired(exp, now_ms)) + break; + } while (1); + tv_leaving_poll(wait_time, status); thread_harmless_end(); diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c index a894f6687..d09784770 100644 --- a/src/ev_kqueue.c +++ b/src/ev_kqueue.c @@ -28,6 +28,7 @@ #include #include +#include /* private data */ @@ -76,14 +77,14 @@ static int _update_fd(int fd, int start) REGPRM2 static void _do_poll(struct poller *p, int exp) { int status; - int count, fd, delta_ms; - struct timespec timeout; + int count, fd, wait_time; + struct timespec timeout_ts; int updt_idx; int changes = 0; int old_fd; - timeout.tv_sec = 0; - timeout.tv_nsec = 0; + timeout_ts.tv_sec = 0; + timeout_ts.tv_nsec = 0; /* first, scan the update list to find changes */ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; @@ -126,25 +127,41 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) */ EV_SET(&kev[changes++], -1, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); #endif - kevent(kqueue_fd[tid], kev, changes, kev_out, changes, &timeout); + kevent(kqueue_fd[tid], kev, changes, kev_out, changes, &timeout_ts); } fd_nbupdt = 0; /* now let's wait for events */ - delta_ms = compute_poll_timeout(exp); - timeout.tv_sec = (delta_ms / 1000); - timeout.tv_nsec = (delta_ms % 1000) * 1000000; + wait_time = compute_poll_timeout(exp); fd = global.tune.maxpollevents; tv_entering_poll(); activity_count_runtime(); - status = kevent(kqueue_fd[tid], // int kq - NULL, // const struct kevent *changelist - 0, // int nchanges - kev, // struct kevent *eventlist - fd, // int nevents - &timeout); // const struct timespec *timeout - tv_update_date(delta_ms, status); - tv_leaving_poll(delta_ms, status); + + do { + int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time; + + timeout_ts.tv_sec = (timeout / 1000); + timeout_ts.tv_nsec = (timeout % 1000) * 1000000; + + status = kevent(kqueue_fd[tid], // int kq + NULL, // const struct kevent *changelist + 0, // int nchanges + kev, // struct kevent *eventlist + fd, // int nevents + &timeout_ts); // const struct timespec *timeout + tv_update_date(timeout, status); + + if (status) + break; + if (timeout || !wait_time) + break; + if (signal_queue_len) + break; + if (tick_isset(exp) && tick_is_expired(exp, now_ms)) + break; + } while (1); + + tv_leaving_poll(wait_time, status); thread_harmless_end(); diff --git a/src/stats.c b/src/stats.c index b082d810f..9b0800edd 100644 --- a/src/stats.c +++ b/src/stats.c @@ -139,6 +139,7 @@ const char *info_field_names[INF_TOTAL_FIELDS] = { [INF_ACTIVE_PEERS] = "ActivePeers", [INF_CONNECTED_PEERS] = "ConnectedPeers", [INF_DROPPED_LOGS] = "DroppedLogs", + [INF_BUSY_POLLING] = "BusyPolling", }; const char *stat_field_names[ST_F_TOTAL_FIELDS] = { @@ -3562,6 +3563,7 @@ int stats_fill_info(struct field *info, int len) info[INF_ACTIVE_PEERS] = mkf_u32(0, active_peers); info[INF_CONNECTED_PEERS] = mkf_u32(0, connected_peers); info[INF_DROPPED_LOGS] = mkf_u32(0, dropped_logs); + info[INF_BUSY_POLLING] = mkf_u32(0, !!(global.tune.options & GTUNE_BUSY_POLLING)); return 1; }