opnsense-src/sys/sys/sched.h

237 lines
7.9 KiB
C
Raw Normal View History

/*-
* Copyright (c) 1996, 1997
* HD Associates, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by HD Associates, Inc
* and Jukka Antero Ukkonen.
* 4. Neither the name of the author nor the names of any co-contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY HD ASSOCIATES AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL HD ASSOCIATES OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*-
* Copyright (c) 2002-2008, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SCHED_H_
#define _SCHED_H_
#ifdef _KERNEL
/*
* General scheduling info.
*
* sched_load:
* Total runnable non-ithread threads in the system.
*
* sched_runnable:
* Runnable threads for this processor.
*/
int sched_load(void);
int sched_rr_interval(void);
int sched_runnable(void);
/*
* Proc related scheduling hooks.
*/
void sched_exit(struct proc *p, struct thread *childtd);
Refactor a bunch of scheduler code to give basically the same behaviour but with slightly cleaned up interfaces. The KSE structure has become the same as the "per thread scheduler private data" structure. In order to not make the diffs too great one is #defined as the other at this time. The KSE (or td_sched) structure is now allocated per thread and has no allocation code of its own. Concurrency for a KSEGRP is now kept track of via a simple pair of counters rather than using KSE structures as tokens. Since the KSE structure is different in each scheduler, kern_switch.c is now included at the end of each scheduler. Nothing outside the scheduler knows the contents of the KSE (aka td_sched) structure. The fields in the ksegrp structure that are to do with the scheduler's queueing mechanisms are now moved to the kg_sched structure. (per ksegrp scheduler private data structure). In other words how the scheduler queues and keeps track of threads is no-one's business except the scheduler's. This should allow people to write experimental schedulers with completely different internal structuring. A scheduler call sched_set_concurrency(kg, N) has been added that notifies teh scheduler that no more than N threads from that ksegrp should be allowed to be on concurrently scheduled. This is also used to enforce 'fainess' at this time so that a ksegrp with 10000 threads can not swamp a the run queue and force out a process with 1 thread, since the current code will not set the concurrency above NCPU, and both schedulers will not allow more than that many onto the system run queue at a time. Each scheduler should eventualy develop their own methods to do this now that they are effectively separated. Rejig libthr's kernel interface to follow the same code paths as linkse for scope system threads. This has slightly hurt libthr's performance but I will work to recover as much of it as I can. Thread exit code has been cleaned up greatly. exit and exec code now transitions a process back to 'standard non-threaded mode' before taking the next step. Reviewed by: scottl, peter MFC after: 1 week
2004-09-04 22:09:54 -04:00
void sched_fork(struct thread *td, struct thread *childtd);
void sched_fork_exit(struct thread *td);
void sched_class(struct thread *td, int class);
void sched_nice(struct proc *p, int nice);
/*
* Threads are switched in and out, block on resources, have temporary
* priorities inherited from their procs, and use up cpu time.
*/
void sched_exit_thread(struct thread *td, struct thread *child);
void sched_fork_thread(struct thread *td, struct thread *child);
void sched_lend_prio(struct thread *td, u_char prio);
void sched_lend_user_prio(struct thread *td, u_char pri);
fixpt_t sched_pctcpu(struct thread *td);
void sched_prio(struct thread *td, u_char prio);
void sched_sleep(struct thread *td, int prio);
void sched_switch(struct thread *td, struct thread *newtd, int flags);
void sched_throw(struct thread *td);
Rework the interface between priority propagation (lending) and the schedulers a bit to ensure more correct handling of priorities and fewer priority inversions: - Add two functions to the sched(9) API to handle priority lending: sched_lend_prio() and sched_unlend_prio(). The turnstile code uses these functions to ask the scheduler to lend a thread a set priority and to tell the scheduler when it thinks it is ok for a thread to stop borrowing priority. The unlend case is slightly complex in that the turnstile code tells the scheduler what the minimum priority of the thread needs to be to satisfy the requirements of any other threads blocked on locks owned by the thread in question. The scheduler then decides where the thread can go back to normal mode (if it's normal priority is high enough to satisfy the pending lock requests) or it it should continue to use the priority specified to the sched_unlend_prio() call. This involves adding a new per-thread flag TDF_BORROWING that replaces the ULE-only kse flag for priority elevation. - Schedulers now refuse to lower the priority of a thread that is currently borrowing another therad's priority. - If a scheduler changes the priority of a thread that is currently sitting on a turnstile, it will call a new function turnstile_adjust() to inform the turnstile code of the change. This function resorts the thread on the priority list of the turnstile if needed, and if the thread ends up at the head of the list (due to having the highest priority) and its priority was raised, then it will propagate that new priority to the owner of the lock it is blocked on. Some additional fixes specific to the 4BSD scheduler include: - Common code for updating the priority of a thread when the user priority of its associated kse group has been consolidated in a new static function resetpriority_thread(). One change to this function is that it will now only adjust the priority of a thread if it already has a time sharing priority, thus preserving any boosts from a tsleep() until the thread returns to userland. Also, resetpriority() no longer calls maybe_resched() on each thread in the group. Instead, the code calling resetpriority() is responsible for calling resetpriority_thread() on any threads that need to be updated. - schedcpu() now uses resetpriority_thread() instead of just calling sched_prio() directly after it updates a kse group's user priority. - sched_clock() now uses resetpriority_thread() rather than writing directly to td_priority. - sched_nice() now updates all the priorities of the threads after the group priority has been adjusted. Discussed with: bde Reviewed by: ups, jeffr Tested on: 4bsd, ule Tested on: i386, alpha, sparc64
2004-12-30 15:52:44 -05:00
void sched_unlend_prio(struct thread *td, u_char prio);
void sched_user_prio(struct thread *td, u_char prio);
void sched_userret(struct thread *td);
void sched_wakeup(struct thread *td);
void sched_preempt(struct thread *td);
/*
* Threads are moved on and off of run queues
*/
void sched_add(struct thread *td, int flags);
void sched_clock(struct thread *td);
void sched_rem(struct thread *td);
Refactor timer management code with priority to one-shot operation mode. The main goal of this is to generate timer interrupts only when there is some work to do. When CPU is busy interrupts are generating at full rate of hz + stathz to fullfill scheduler and timekeeping requirements. But when CPU is idle, only minimum set of interrupts (down to 8 interrupts per second per CPU now), needed to handle scheduled callouts is executed. This allows significantly increase idle CPU sleep time, increasing effect of static power-saving technologies. Also it should reduce host CPU load on virtualized systems, when guest system is idle. There is set of tunables, also available as writable sysctls, allowing to control wanted event timer subsystem behavior: kern.eventtimer.timer - allows to choose event timer hardware to use. On x86 there is up to 4 different kinds of timers. Depending on whether chosen timer is per-CPU, behavior of other options slightly differs. kern.eventtimer.periodic - allows to choose periodic and one-shot operation mode. In periodic mode, current timer hardware taken as the only source of time for time events. This mode is quite alike to previous kernel behavior. One-shot mode instead uses currently selected time counter hardware to schedule all needed events one by one and program timer to generate interrupt exactly in specified time. Default value depends of chosen timer capabilities, but one-shot mode is preferred, until other is forced by user or hardware. kern.eventtimer.singlemul - in periodic mode specifies how much times higher timer frequency should be, to not strictly alias hardclock() and statclock() events. Default values are 2 and 4, but could be reduced to 1 if extra interrupts are unwanted. kern.eventtimer.idletick - makes each CPU to receive every timer interrupt independently of whether they busy or not. By default this options is disabled. If chosen timer is per-CPU and runs in periodic mode, this option has no effect - all interrupts are generating. As soon as this patch modifies cpu_idle() on some platforms, I have also refactored one on x86. Now it makes use of MONITOR/MWAIT instrunctions (if supported) under high sleep/wakeup rate, as fast alternative to other methods. It allows SMP scheduler to wake up sleeping CPUs much faster without using IPI, significantly increasing performance on some highly task-switching loads. Tested by: many (on i386, amd64, sparc64 and powerc) H/W donated by: Gheorghe Ardelean Sponsored by: iXsystems, Inc.
2010-09-13 03:25:35 -04:00
void sched_tick(int cnt);
void sched_relinquish(struct thread *td);
struct thread *sched_choose(void);
void sched_idletd(void *);
/*
* Binding makes cpu affinity permanent while pinning is used to temporarily
* hold a thread on a particular CPU.
*/
void sched_bind(struct thread *td, int cpu);
static __inline void sched_pin(void);
void sched_unbind(struct thread *td);
static __inline void sched_unpin(void);
int sched_is_bound(struct thread *td);
void sched_affinity(struct thread *td);
/*
* These procedures tell the process data structure allocation code how
* many bytes to actually allocate.
*/
int sched_sizeof_proc(void);
int sched_sizeof_thread(void);
/*
* This routine provides a consistent thread name for use with KTR graphing
* functions.
*/
char *sched_tdname(struct thread *td);
static __inline void
sched_pin(void)
{
curthread->td_pinned++;
}
static __inline void
sched_unpin(void)
{
curthread->td_pinned--;
}
/* sched_add arguments (formerly setrunqueue) */
#define SRQ_BORING 0x0000 /* No special circumstances. */
#define SRQ_YIELDING 0x0001 /* We are yielding (from mi_switch). */
#define SRQ_OURSELF 0x0002 /* It is ourself (from mi_switch). */
#define SRQ_INTR 0x0004 /* It is probably urgent. */
#define SRQ_PREEMPTED 0x0008 /* has been preempted.. be kind */
#define SRQ_BORROWING 0x0010 /* Priority updated due to prio_lend */
/* Scheduler stats. */
#ifdef SCHED_STATS
DPCPU_DECLARE(long, sched_switch_stats[SWT_COUNT]);
#define SCHED_STAT_DEFINE_VAR(name, ptr, descr) \
static void name ## _add_proc(void *dummy __unused) \
{ \
\
SYSCTL_ADD_PROC(NULL, \
SYSCTL_STATIC_CHILDREN(_kern_sched_stats), OID_AUTO, \
#name, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, \
ptr, 0, sysctl_dpcpu_long, "LU", descr); \
} \
SYSINIT(name, SI_SUB_RUN_SCHEDULER, SI_ORDER_MIDDLE, name ## _add_proc, NULL);
#define SCHED_STAT_DEFINE(name, descr) \
DPCPU_DEFINE(unsigned long, name); \
SCHED_STAT_DEFINE_VAR(name, &DPCPU_NAME(name), descr)
/*
* Sched stats are always incremented in critical sections so no atomic
* is necesssary to increment them.
*/
#define SCHED_STAT_INC(var) DPCPU_GET(var)++;
#else
#define SCHED_STAT_DEFINE_VAR(name, descr, ptr)
#define SCHED_STAT_DEFINE(name, descr)
#define SCHED_STAT_INC(var) (void)0
#endif
/*
* Fixup scheduler state for proc0 and thread0
*/
Refactor a bunch of scheduler code to give basically the same behaviour but with slightly cleaned up interfaces. The KSE structure has become the same as the "per thread scheduler private data" structure. In order to not make the diffs too great one is #defined as the other at this time. The KSE (or td_sched) structure is now allocated per thread and has no allocation code of its own. Concurrency for a KSEGRP is now kept track of via a simple pair of counters rather than using KSE structures as tokens. Since the KSE structure is different in each scheduler, kern_switch.c is now included at the end of each scheduler. Nothing outside the scheduler knows the contents of the KSE (aka td_sched) structure. The fields in the ksegrp structure that are to do with the scheduler's queueing mechanisms are now moved to the kg_sched structure. (per ksegrp scheduler private data structure). In other words how the scheduler queues and keeps track of threads is no-one's business except the scheduler's. This should allow people to write experimental schedulers with completely different internal structuring. A scheduler call sched_set_concurrency(kg, N) has been added that notifies teh scheduler that no more than N threads from that ksegrp should be allowed to be on concurrently scheduled. This is also used to enforce 'fainess' at this time so that a ksegrp with 10000 threads can not swamp a the run queue and force out a process with 1 thread, since the current code will not set the concurrency above NCPU, and both schedulers will not allow more than that many onto the system run queue at a time. Each scheduler should eventualy develop their own methods to do this now that they are effectively separated. Rejig libthr's kernel interface to follow the same code paths as linkse for scope system threads. This has slightly hurt libthr's performance but I will work to recover as much of it as I can. Thread exit code has been cleaned up greatly. exit and exec code now transitions a process back to 'standard non-threaded mode' before taking the next step. Reviewed by: scottl, peter MFC after: 1 week
2004-09-04 22:09:54 -04:00
void schedinit(void);
#endif /* _KERNEL */
/* POSIX 1003.1b Process Scheduling */
/*
* POSIX scheduling policies
*/
#define SCHED_FIFO 1
#define SCHED_OTHER 2
#define SCHED_RR 3
struct sched_param {
int sched_priority;
};
/*
* POSIX scheduling declarations for userland.
*/
#ifndef _KERNEL
#include <sys/cdefs.h>
#include <sys/_types.h>
#ifndef _PID_T_DECLARED
typedef __pid_t pid_t;
#define _PID_T_DECLARED
#endif
struct timespec;
__BEGIN_DECLS
int sched_get_priority_max(int);
int sched_get_priority_min(int);
int sched_getparam(pid_t, struct sched_param *);
int sched_getscheduler(pid_t);
int sched_rr_get_interval(pid_t, struct timespec *);
int sched_setparam(pid_t, const struct sched_param *);
int sched_setscheduler(pid_t, int, const struct sched_param *);
int sched_yield(void);
__END_DECLS
#endif
#endif /* !_SCHED_H_ */