Context Navigation

Reverse Diff

scheduler.c [583c2a3:d23712e] in mainline

File:

: 1 edited

kernel/generic/src/proc/scheduler.c (modified) (17 diffs)

Legend:

: Unmodified
: Added
: Removed

kernel/generic/src/proc/scheduler.c

-              r583c2a3
+              rd23712e
 /*
  * Copyright (c) 2010 Jakub Jermar
+ * Copyright (c) 2023 Jiří Zárevúcky
  * All rights reserved.
+ *
 …
 #include <time/delay.h>
 #include <arch/asm.h>
-#include <arch/faddr.h>
 #include <arch/cycle.h>
 #include <atomic.h>
 …
 #include <stacktrace.h>
+static void scheduler_separated_stack(void);
+atomic_t nrdy;  /**< Number of ready threads in the system. */
+/** Carry out actions before new task runs. */
+static void before_task_runs(void)
+{
+        before_task_runs_arch();
+}
+/** Take actions before new thread runs.
+ *
+ * Perform actions that need to be
+ * taken before the newly selected
+ * thread is passed control.
+ *
+ * THREAD->lock is locked on entry
+ *
+ */
+static void before_thread_runs(void)
+{
+        before_thread_runs_arch();
+#ifdef CONFIG_FPU_LAZY
+        if (THREAD == CPU->fpu_owner)
+                fpu_enable();
+        else
+                fpu_disable();
+#elif defined CONFIG_FPU
+        fpu_enable();
+        if (THREAD->fpu_context_exists)
+                fpu_context_restore(THREAD->saved_fpu_context);
+        else {
+                fpu_init();
+                THREAD->fpu_context_exists = true;
+        }
+#endif
+#ifdef CONFIG_UDEBUG
+        if (THREAD->btrace) {
+                istate_t *istate = THREAD->udebug.uspace_state;
+                if (istate != NULL) {
+                        printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
+                        stack_trace_istate(istate);
+                }
+                THREAD->btrace = false;
+        }
+#endif
+}
+/** Take actions after THREAD had run.
+ *
+ * Perform actions that need to be
+ * taken after the running thread
+ * had been preempted by the scheduler.
+ *
+ * THREAD->lock is locked on entry
+ *
+ */
+static void after_thread_ran(void)
+{
+        after_thread_ran_arch();
+}
+atomic_size_t nrdy;  /**< Number of ready threads in the system. */
 #ifdef CONFIG_FPU_LAZY
 …
+{
         fpu_enable();
+        irq_spinlock_lock(&CPU->lock, false);
+        /* We need this lock to ensure synchronization with thread destructor. */
+        irq_spinlock_lock(&CPU->fpu_lock, false);
         /* Save old context */
+        if (CPU->fpu_owner != NULL) {
+                irq_spinlock_lock(&CPU->fpu_owner->lock, false);
+                fpu_context_save(CPU->fpu_owner->saved_fpu_context);
+                /* Don't prevent migration */
+                CPU->fpu_owner->fpu_context_engaged = false;
+                irq_spinlock_unlock(&CPU->fpu_owner->lock, false);
+                CPU->fpu_owner = NULL;
+        }
+        irq_spinlock_lock(&THREAD->lock, false);
+        thread_t *owner = atomic_load_explicit(&CPU->fpu_owner, memory_order_relaxed);
+        if (owner != NULL) {
+                fpu_context_save(&owner->fpu_context);
+                atomic_store_explicit(&CPU->fpu_owner, NULL, memory_order_relaxed);
+        }
+        irq_spinlock_unlock(&CPU->fpu_lock, false);
         if (THREAD->fpu_context_exists) {
                 fpu_context_restore(THREAD->saved_fpu_context);
+                fpu_context_restore(&THREAD->fpu_context);
         } else {
                 fpu_init();
 …
+        }
+        CPU->fpu_owner = THREAD;
+        THREAD->fpu_context_engaged = true;
+        irq_spinlock_unlock(&THREAD->lock, false);
+        irq_spinlock_unlock(&CPU->lock, false);
+        atomic_store_explicit(&CPU->fpu_owner, THREAD, memory_order_relaxed);
+}
 #endif /* CONFIG_FPU_LAZY */
 …
+ *
  */
+static thread_t *find_best_thread(void)
+{
+static thread_t *try_find_thread(int *rq_index)
+{
+        assert(interrupts_disabled());
         assert(CPU != NULL);
+loop:
+        if (atomic_load(&CPU->nrdy) == 0) {
+                /*
+                 * For there was nothing to run, the CPU goes to sleep
+                 * until a hardware interrupt or an IPI comes.
+                 * This improves energy saving and hyperthreading.
+                 */
+                irq_spinlock_lock(&CPU->lock, false);
+                CPU->idle = true;
+                irq_spinlock_unlock(&CPU->lock, false);
+                interrupts_enable();
+                /*
+                 * An interrupt might occur right now and wake up a thread.
+                 * In such case, the CPU will continue to go to sleep
+                 * even though there is a runnable thread.
+                 */
+                cpu_sleep();
+                interrupts_disable();
+                goto loop;
+        }
+        assert(!CPU->idle);
+        unsigned int i;
+        for (i = 0; i < RQ_COUNT; i++) {
+        if (atomic_load(&CPU->nrdy) == 0)
+                return NULL;
+        for (int i = 0; i < RQ_COUNT; i++) {
                 irq_spinlock_lock(&(CPU->rq[i].lock), false);
                 if (CPU->rq[i].n == 0) {
 …
                 list_remove(&thread->rq_link);
+                irq_spinlock_pass(&(CPU->rq[i].lock), &thread->lock);
+                thread->cpu = CPU;
+                thread->ticks = us2ticks((i + 1) * 10000);
+                thread->priority = i;  /* Correct rq index */
+                /*
+                 * Clear the stolen flag so that it can be migrated
+                 * when load balancing needs emerge.
+                 */
+                thread->stolen = false;
+                irq_spinlock_unlock(&thread->lock, false);
+                irq_spinlock_unlock(&(CPU->rq[i].lock), false);
+                *rq_index = i;
                 return thread;
+        }
+        goto loop;
+        return NULL;
+}
+/** Get thread to be scheduled
+ *
+ * Get the optimal thread to be scheduled
+ * according to thread accounting and scheduler
+ * policy.
+ *
+ * @return Thread to be scheduled.
+ *
+ */
+static thread_t *find_best_thread(int *rq_index)
+{
+        assert(interrupts_disabled());
+        assert(CPU != NULL);
+        while (true) {
+                thread_t *thread = try_find_thread(rq_index);
+                if (thread != NULL)
+                        return thread;
+                /*
+                 * For there was nothing to run, the CPU goes to sleep
+                 * until a hardware interrupt or an IPI comes.
+                 * This improves energy saving and hyperthreading.
+                 */
+                CPU_LOCAL->idle = true;
+                /*
+                 * Go to sleep with interrupts enabled.
+                 * Ideally, this should be atomic, but this is not guaranteed on
+                 * all platforms yet, so it is possible we will go sleep when
+                 * a thread has just become available.
+                 */
+                cpu_interruptible_sleep();
+        }
+}
+static void switch_task(task_t *task)
+{
+        /* If the task stays the same, a lot of work is avoided. */
+        if (TASK == task)
+                return;
+        as_t *old_as = AS;
+        as_t *new_as = task->as;
+        /* It is possible for two tasks to share one address space. */
+        if (old_as != new_as)
+                as_switch(old_as, new_as);
+        if (TASK)
+                task_release(TASK);
+        TASK = task;
+        task_hold(TASK);
+        before_task_runs_arch();
+}
 …
 static void relink_rq(int start)
+{
+        assert(interrupts_disabled());
+        if (CPU_LOCAL->current_clock_tick < CPU_LOCAL->relink_deadline)
+                return;
+        CPU_LOCAL->relink_deadline = CPU_LOCAL->current_clock_tick + NEEDS_RELINK_MAX;
+        /* Temporary cache for lists we are moving. */
         list_t list;
         list_initialize(&list);
+        irq_spinlock_lock(&CPU->lock, false);
+        if (CPU->needs_relink > NEEDS_RELINK_MAX) {
+                int i;
+                for (i = start; i < RQ_COUNT - 1; i++) {
+                        /* Remember and empty rq[i + 1] */
+                        irq_spinlock_lock(&CPU->rq[i + 1].lock, false);
+                        list_concat(&list, &CPU->rq[i + 1].rq);
+                        size_t n = CPU->rq[i + 1].n;
+                        CPU->rq[i + 1].n = 0;
+                        irq_spinlock_unlock(&CPU->rq[i + 1].lock, false);
+                        /* Append rq[i + 1] to rq[i] */
+                        irq_spinlock_lock(&CPU->rq[i].lock, false);
+                        list_concat(&CPU->rq[i].rq, &list);
+                        CPU->rq[i].n += n;
+                        irq_spinlock_unlock(&CPU->rq[i].lock, false);
+                }
+                CPU->needs_relink = 0;
+        }
+        irq_spinlock_unlock(&CPU->lock, false);
+}
+/** The scheduler
+ *
+ * The thread scheduling procedure.
+ * Passes control directly to
+ * scheduler_separated_stack().
+ *
+ */
+void scheduler(void)
+{
+        volatile ipl_t ipl;
+        assert(CPU != NULL);
+        ipl = interrupts_disable();
+        if (atomic_load(&haltstate))
+                halt();
+        if (THREAD) {
+                irq_spinlock_lock(&THREAD->lock, false);
+                /* Update thread kernel accounting */
+                THREAD->kcycles += get_cycle() - THREAD->last_cycle;
+        size_t n = 0;
+        /* Move every list (except the one with highest priority) one level up. */
+        for (int i = RQ_COUNT - 1; i > start; i--) {
+                irq_spinlock_lock(&CPU->rq[i].lock, false);
+                /* Swap lists. */
+                list_swap(&CPU->rq[i].rq, &list);
+                /* Swap number of items. */
+                size_t tmpn = CPU->rq[i].n;
+                CPU->rq[i].n = n;
+                n = tmpn;
+                irq_spinlock_unlock(&CPU->rq[i].lock, false);
+        }
+        /* Append the contents of rq[start + 1]  to rq[start]. */
+        if (n != 0) {
+                irq_spinlock_lock(&CPU->rq[start].lock, false);
+                list_concat(&CPU->rq[start].rq, &list);
+                CPU->rq[start].n += n;
+                irq_spinlock_unlock(&CPU->rq[start].lock, false);
+        }
+}
+/**
+ * Do whatever needs to be done with current FPU state before we switch to
+ * another thread.
+ */
+static void fpu_cleanup(void)
+{
 #if (defined CONFIG_FPU) && (!defined CONFIG_FPU_LAZY)
                 fpu_context_save(THREAD->saved_fpu_context);
+        fpu_context_save(&THREAD->fpu_context);
 #endif
+                if (!context_save(&THREAD->saved_context)) {
+                        /*
+                         * This is the place where threads leave scheduler();
+                         */
+                        /* Save current CPU cycle */
+                        THREAD->last_cycle = get_cycle();
+                        irq_spinlock_unlock(&THREAD->lock, false);
+                        interrupts_restore(THREAD->saved_context.ipl);
+                        return;
+                }
+                /*
+                 * Interrupt priority level of preempted thread is recorded
+                 * here to facilitate scheduler() invocations from
+                 * interrupts_disable()'d code (e.g. waitq_sleep_timeout()).
+                 *
+                 */
+                THREAD->saved_context.ipl = ipl;
+        }
+}
+/**
+ * Set correct FPU state for this thread after switch from another thread.
+ */
+static void fpu_restore(void)
+{
+#ifdef CONFIG_FPU_LAZY
         /*
+         * Through the 'CURRENT' structure, we keep track of THREAD, TASK, CPU, AS
+         * and preemption counter. At this point CURRENT could be coming either
+         * from THREAD's or CPU's stack.
+         *
+         * The only concurrent modification possible for fpu_owner here is
+         * another thread changing it from itself to NULL in its destructor.
          */
+        current_copy(CURRENT, (current_t *) CPU->stack);
+        thread_t *owner = atomic_load_explicit(&CPU->fpu_owner,
+            memory_order_relaxed);
+        if (THREAD == owner)
+                fpu_enable();
+        else
+                fpu_disable();
+#elif defined CONFIG_FPU
+        fpu_enable();
+        if (THREAD->fpu_context_exists)
+                fpu_context_restore(&THREAD->fpu_context);
+        else {
+                fpu_init();
+                THREAD->fpu_context_exists = true;
+        }
+#endif
+}
+/** Things to do before we switch to THREAD context.
+ */
+static void prepare_to_run_thread(int rq_index)
+{
+        relink_rq(rq_index);
+        switch_task(THREAD->task);
+        assert(atomic_get_unordered(&THREAD->cpu) == CPU);
+        atomic_set_unordered(&THREAD->state, Running);
+        atomic_set_unordered(&THREAD->priority, rq_index);  /* Correct rq index */
         /*
+         * We may not keep the old stack.
+         * Reason: If we kept the old stack and got blocked, for instance, in
+         * find_best_thread(), the old thread could get rescheduled by another
+         * CPU and overwrite the part of its own stack that was also used by
+         * the scheduler on this CPU.
+         *
+         * Moreover, we have to bypass the compiler-generated POP sequence
+         * which is fooled by SP being set to the very top of the stack.
+         * Therefore the scheduler() function continues in
+         * scheduler_separated_stack().
+         *
+         * Clear the stolen flag so that it can be migrated
+         * when load balancing needs emerge.
          */
+        context_save(&CPU->saved_context);
+        context_set(&CPU->saved_context, FADDR(scheduler_separated_stack),
+            (uintptr_t) CPU->stack, STACK_SIZE);
+        context_restore(&CPU->saved_context);
+        /* Not reached */
+}
+/** Scheduler stack switch wrapper
+ *
+ * Second part of the scheduler() function
+ * using new stack. Handling the actual context
+ * switch to a new thread.
+ *
+ */
+void scheduler_separated_stack(void)
+{
+        DEADLOCK_PROBE_INIT(p_joinwq);
+        task_t *old_task = TASK;
+        as_t *old_as = AS;
+        assert((!THREAD) || (irq_spinlock_locked(&THREAD->lock)));
+        assert(CPU != NULL);
+        assert(interrupts_disabled());
+        /*
+         * Hold the current task and the address space to prevent their
+         * possible destruction should thread_destroy() be called on this or any
+         * other processor while the scheduler is still using them.
+         */
+        if (old_task)
+                task_hold(old_task);
+        if (old_as)
+                as_hold(old_as);
+        if (THREAD) {
+                /* Must be run after the switch to scheduler stack */
+                after_thread_ran();
+                switch (THREAD->state) {
+                case Running:
+                        irq_spinlock_unlock(&THREAD->lock, false);
+                        thread_ready(THREAD);
+                        break;
+                case Exiting:
+                repeat:
+                        if (THREAD->detached) {
+                                thread_destroy(THREAD, false);
+                        } else {
+                                /*
+                                 * The thread structure is kept allocated until
+                                 * somebody calls thread_detach() on it.
+                                 */
+                                if (!irq_spinlock_trylock(&THREAD->join_wq.lock)) {
+                                        /*
+                                         * Avoid deadlock.
+                                         */
+                                        irq_spinlock_unlock(&THREAD->lock, false);
+                                        delay(HZ);
+                                        irq_spinlock_lock(&THREAD->lock, false);
+                                        DEADLOCK_PROBE(p_joinwq,
+                                            DEADLOCK_THRESHOLD);
+                                        goto repeat;
+                                }
+                                _waitq_wakeup_unsafe(&THREAD->join_wq,
+                                    WAKEUP_FIRST);
+                                irq_spinlock_unlock(&THREAD->join_wq.lock, false);
+                                THREAD->state = Lingering;
+                                irq_spinlock_unlock(&THREAD->lock, false);
+                        }
+                        break;
+                case Sleeping:
+                        /*
+                         * Prefer the thread after it's woken up.
+                         */
+                        THREAD->priority = -1;
+                        /*
+                         * We need to release wq->lock which we locked in
+                         * waitq_sleep(). Address of wq->lock is kept in
+                         * THREAD->sleep_queue.
+                         */
+                        irq_spinlock_unlock(&THREAD->sleep_queue->lock, false);
+                        irq_spinlock_unlock(&THREAD->lock, false);
+                        break;
+                default:
+                        /*
+                         * Entering state is unexpected.
+                         */
+                        panic("tid%" PRIu64 ": unexpected state %s.",
+                            THREAD->tid, thread_states[THREAD->state]);
+                        break;
+                }
+                THREAD = NULL;
+        }
+        THREAD = find_best_thread();
+        irq_spinlock_lock(&THREAD->lock, false);
+        int priority = THREAD->priority;
+        irq_spinlock_unlock(&THREAD->lock, false);
+        relink_rq(priority);
+        /*
+         * If both the old and the new task are the same,
+         * lots of work is avoided.
+         */
+        if (TASK != THREAD->task) {
+                as_t *new_as = THREAD->task->as;
+                /*
+                 * Note that it is possible for two tasks
+                 * to share one address space.
+                 */
+                if (old_as != new_as) {
+                        /*
+                         * Both tasks and address spaces are different.
+                         * Replace the old one with the new one.
+                         */
+                        as_switch(old_as, new_as);
+                }
+                TASK = THREAD->task;
+                before_task_runs();
+        }
+        if (old_task)
+                task_release(old_task);
+        if (old_as)
+                as_release(old_as);
+        irq_spinlock_lock(&THREAD->lock, false);
+        THREAD->state = Running;
+        THREAD->stolen = false;
 #ifdef SCHEDULER_VERBOSE
         log(LF_OTHER, LVL_DEBUG,
             "cpu%u: tid %" PRIu64 " (priority=%d, ticks=%" PRIu64
             ", nrdy=%zu)", CPU->id, THREAD->tid, THREAD->priority,
+            ", nrdy=%zu)", CPU->id, THREAD->tid, rq_index,
             THREAD->ticks, atomic_load(&CPU->nrdy));
 #endif
 …
          * function must be executed before the switch to the new stack.
          */
+        before_thread_runs();
+        before_thread_runs_arch();
+#ifdef CONFIG_UDEBUG
+        if (atomic_get_unordered(&THREAD->btrace)) {
+                istate_t *istate = THREAD->udebug.uspace_state;
+                if (istate != NULL) {
+                        printf("Thread %" PRIu64 " stack trace:\n", THREAD->tid);
+                        stack_trace_istate(istate);
+                } else {
+                        printf("Thread %" PRIu64 " interrupt state not available\n", THREAD->tid);
+                }
+                atomic_set_unordered(&THREAD->btrace, false);
+        }
+#endif
+        fpu_restore();
+        /* Time allocation in microseconds. */
+        uint64_t time_to_run = (rq_index + 1) * 10000;
+        /* Set the time of next preemption. */
+        CPU_LOCAL->preempt_deadline =
+            CPU_LOCAL->current_clock_tick + us2ticks(time_to_run);
+        /* Save current CPU cycle */
+        THREAD->last_cycle = get_cycle();
+}
+static void add_to_rq(thread_t *thread, cpu_t *cpu, int i)
+{
+        /* Add to the appropriate runqueue. */
+        runq_t *rq = &cpu->rq[i];
+        irq_spinlock_lock(&rq->lock, false);
+        list_append(&thread->rq_link, &rq->rq);
+        rq->n++;
+        irq_spinlock_unlock(&rq->lock, false);
+        atomic_inc(&nrdy);
+        atomic_inc(&cpu->nrdy);
+}
+/** Requeue a thread that was just preempted on this CPU.
+ */
+static void thread_requeue_preempted(thread_t *thread)
+{
+        assert(interrupts_disabled());
+        assert(atomic_get_unordered(&thread->state) == Running);
+        assert(atomic_get_unordered(&thread->cpu) == CPU);
+        int prio = atomic_get_unordered(&thread->priority);
+        if (prio < RQ_COUNT - 1) {
+                prio++;
+                atomic_set_unordered(&thread->priority, prio);
+        }
+        atomic_set_unordered(&thread->state, Ready);
+        add_to_rq(thread, CPU, prio);
+}
+void thread_requeue_sleeping(thread_t *thread)
+{
+        ipl_t ipl = interrupts_disable();
+        assert(atomic_get_unordered(&thread->state) == Sleeping || atomic_get_unordered(&thread->state) == Entering);
+        atomic_set_unordered(&thread->priority, 0);
+        atomic_set_unordered(&thread->state, Ready);
+        /* Prefer the CPU on which the thread ran last */
+        cpu_t *cpu = atomic_get_unordered(&thread->cpu);
+        if (!cpu) {
+                cpu = CPU;
+                atomic_set_unordered(&thread->cpu, CPU);
+        }
+        add_to_rq(thread, cpu, 0);
+        interrupts_restore(ipl);
+}
+static void cleanup_after_thread(thread_t *thread)
+{
+        assert(CURRENT->mutex_locks == 0);
+        assert(interrupts_disabled());
+        int expected;
+        switch (atomic_get_unordered(&thread->state)) {
+        case Running:
+                thread_requeue_preempted(thread);
+                break;
+        case Exiting:
+                waitq_close(&thread->join_wq);
+                /*
+                 * Release the reference CPU has for the thread.
+                 * If there are no other references (e.g. threads calling join),
+                 * the thread structure is deallocated.
+                 */
+                thread_put(thread);
+                break;
+        case Sleeping:
+                expected = SLEEP_INITIAL;
+                /* Only set SLEEP_ASLEEP in sleep pad if it's still in initial state */
+                if (!atomic_compare_exchange_strong_explicit(&thread->sleep_state,
+                    &expected, SLEEP_ASLEEP,
+                    memory_order_acq_rel, memory_order_acquire)) {
+                        assert(expected == SLEEP_WOKE);
+                        /* The thread has already been woken up, requeue immediately. */
+                        thread_requeue_sleeping(thread);
+                }
+                break;
+        default:
+                /*
+                 * Entering state is unexpected.
+                 */
+                panic("tid%" PRIu64 ": unexpected state %s.",
+                    thread->tid, thread_states[atomic_get_unordered(&thread->state)]);
+                break;
+        }
+}
+/** Switch to scheduler context to let other threads run. */
+void scheduler_enter(state_t new_state)
+{
+        ipl_t ipl = interrupts_disable();
+        assert(CPU != NULL);
+        assert(THREAD != NULL);
+        if (atomic_load(&haltstate))
+                halt();
+        /* Check if we have a thread to switch to. */
+        int rq_index;
+        thread_t *new_thread = try_find_thread(&rq_index);
+        if (new_thread == NULL && new_state == Running) {
+                /* No other thread to run, but we still have work to do here. */
+                interrupts_restore(ipl);
+                return;
+        }
+        atomic_set_unordered(&THREAD->state, new_state);
+        /* Update thread kernel accounting */
+        atomic_time_increment(&THREAD->kcycles, get_cycle() - THREAD->last_cycle);
+        fpu_cleanup();
         /*
          * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
          * thread's stack.
+         * On Sparc, this saves some extra userspace state that's not
+         * covered by context_save()/context_restore().
          */
+        current_copy(CURRENT, (current_t *) THREAD->kstack);
+        context_restore(&THREAD->saved_context);
+        after_thread_ran_arch();
+        if (new_thread) {
+                thread_t *old_thread = THREAD;
+                CPU_LOCAL->prev_thread = old_thread;
+                THREAD = new_thread;
+                /* No waiting necessary, we can switch to the new thread directly. */
+                prepare_to_run_thread(rq_index);
+                current_copy(CURRENT, (current_t *) new_thread->kstack);
+                context_swap(&old_thread->saved_context, &new_thread->saved_context);
+        } else {
+                /*
+                 * A new thread isn't immediately available, switch to a separate
+                 * stack to sleep or do other idle stuff.
+                 */
+                current_copy(CURRENT, (current_t *) CPU_LOCAL->stack);
+                context_swap(&THREAD->saved_context, &CPU_LOCAL->scheduler_context);
+        }
+        assert(CURRENT->mutex_locks == 0);
+        assert(interrupts_disabled());
+        /* Check if we need to clean up after another thread. */
+        if (CPU_LOCAL->prev_thread) {
+                cleanup_after_thread(CPU_LOCAL->prev_thread);
+                CPU_LOCAL->prev_thread = NULL;
+        }
+        interrupts_restore(ipl);
+}
+/** Enter main scheduler loop. Never returns.
+ *
+ * This function switches to a runnable thread as soon as one is available,
+ * after which it is only switched back to if a thread is stopping and there is
+ * no other thread to run in its place. We need a separate context for that
+ * because we're going to block the CPU, which means we need another context
+ * to clean up after the previous thread.
+ */
+void scheduler_run(void)
+{
+        assert(interrupts_disabled());
+        assert(CPU != NULL);
+        assert(TASK == NULL);
+        assert(THREAD == NULL);
+        assert(interrupts_disabled());
+        while (!atomic_load(&haltstate)) {
+                assert(CURRENT->mutex_locks == 0);
+                int rq_index;
+                THREAD = find_best_thread(&rq_index);
+                prepare_to_run_thread(rq_index);
+                /*
+                 * Copy the knowledge of CPU, TASK, THREAD and preemption counter to
+                 * thread's stack.
+                 */
+                current_copy(CURRENT, (current_t *) THREAD->kstack);
+                /* Switch to thread context. */
+                context_swap(&CPU_LOCAL->scheduler_context, &THREAD->saved_context);
+                /* Back from another thread. */
+                assert(CPU != NULL);
+                assert(THREAD != NULL);
+                assert(CURRENT->mutex_locks == 0);
+                assert(interrupts_disabled());
+                cleanup_after_thread(THREAD);
+                /*
+                 * Necessary because we're allowing interrupts in find_best_thread(),
+                 * so we need to avoid other code referencing the thread we left.
+                 */
+                THREAD = NULL;
+        }
+        halt();
+}
+/** Thread wrapper.
+ *
+ * This wrapper is provided to ensure that a starting thread properly handles
+ * everything it needs to do when first scheduled, and when it exits.
+ */
+void thread_main_func(void)
+{
+        assert(interrupts_disabled());
+        void (*f)(void *) = THREAD->thread_code;
+        void *arg = THREAD->thread_arg;
+        /* This is where each thread wakes up after its creation */
+        /* Check if we need to clean up after another thread. */
+        if (CPU_LOCAL->prev_thread) {
+                cleanup_after_thread(CPU_LOCAL->prev_thread);
+                CPU_LOCAL->prev_thread = NULL;
+        }
+        interrupts_enable();
+        f(arg);
+        thread_exit();
         /* Not reached */
 …
 #ifdef CONFIG_SMP
+static thread_t *steal_thread_from(cpu_t *old_cpu, int i)
+{
+        runq_t *old_rq = &old_cpu->rq[i];
+        runq_t *new_rq = &CPU->rq[i];
+        ipl_t ipl = interrupts_disable();
+        irq_spinlock_lock(&old_rq->lock, false);
+        /*
+         * If fpu_owner is any thread in the list, its store is seen here thanks to
+         * the runqueue lock.
+         */
+        thread_t *fpu_owner = atomic_load_explicit(&old_cpu->fpu_owner,
+            memory_order_relaxed);
+        /* Search rq from the back */
+        list_foreach_rev(old_rq->rq, rq_link, thread_t, thread) {
+                /*
+                 * Do not steal CPU-wired threads, threads
+                 * already stolen, threads for which migration
+                 * was temporarily disabled or threads whose
+                 * FPU context is still in the CPU.
+                 */
+                if (thread->stolen || thread->nomigrate || thread == fpu_owner) {
+                        continue;
+                }
+                thread->stolen = true;
+                atomic_set_unordered(&thread->cpu, CPU);
+                /*
+                 * Ready thread on local CPU
+                 */
+#ifdef KCPULB_VERBOSE
+                log(LF_OTHER, LVL_DEBUG,
+                    "kcpulb%u: TID %" PRIu64 " -> cpu%u, "
+                    "nrdy=%ld, avg=%ld", CPU->id, thread->tid,
+                    CPU->id, atomic_load(&CPU->nrdy),
+                    atomic_load(&nrdy) / config.cpu_active);
+#endif
+                /* Remove thread from ready queue. */
+                old_rq->n--;
+                list_remove(&thread->rq_link);
+                irq_spinlock_unlock(&old_rq->lock, false);
+                /* Append thread to local queue. */
+                irq_spinlock_lock(&new_rq->lock, false);
+                list_append(&thread->rq_link, &new_rq->rq);
+                new_rq->n++;
+                irq_spinlock_unlock(&new_rq->lock, false);
+                atomic_dec(&old_cpu->nrdy);
+                atomic_inc(&CPU->nrdy);
+                interrupts_restore(ipl);
+                return thread;
+        }
+        irq_spinlock_unlock(&old_rq->lock, false);
+        interrupts_restore(ipl);
+        return NULL;
+}
 /** Load balancing thread
+ *
 …
         size_t average;
         size_t rdy;
-        /*
-         * Detach kcpulb as nobody will call thread_join_timeout() on it.
-         */
-        thread_detach(THREAD);
 loop:
 …
          */
         size_t acpu;
-        size_t acpu_bias = 0;
         int rq;
         for (rq = RQ_COUNT - 1; rq >= 0; rq--) {
                 for (acpu = 0; acpu < config.cpu_active; acpu++) {
                         cpu_t *cpu = &cpus[(acpu + acpu_bias) % config.cpu_active];
+                        cpu_t *cpu = &cpus[acpu];
                         /*
 …
                                 continue;
+                        irq_spinlock_lock(&(cpu->rq[rq].lock), true);
+                        if (cpu->rq[rq].n == 0) {
+                                irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
+                                continue;
+                        }
+                        thread_t *thread = NULL;
+                        /* Search rq from the back */
+                        link_t *link = list_last(&cpu->rq[rq].rq);
+                        while (link != NULL) {
+                                thread = (thread_t *) list_get_instance(link,
+                                    thread_t, rq_link);
+                                /*
+                                 * Do not steal CPU-wired threads, threads
+                                 * already stolen, threads for which migration
+                                 * was temporarily disabled or threads whose
+                                 * FPU context is still in the CPU.
+                                 */
+                                irq_spinlock_lock(&thread->lock, false);
+                                if ((!thread->wired) && (!thread->stolen) &&
+                                    (!thread->nomigrate) &&
+                                    (!thread->fpu_context_engaged)) {
+                                        /*
+                                         * Remove thread from ready queue.
+                                         */
+                                        irq_spinlock_unlock(&thread->lock,
+                                            false);
+                                        atomic_dec(&cpu->nrdy);
+                                        atomic_dec(&nrdy);
+                                        cpu->rq[rq].n--;
+                                        list_remove(&thread->rq_link);
+                                        break;
+                                }
+                                irq_spinlock_unlock(&thread->lock, false);
+                                link = list_prev(link, &cpu->rq[rq].rq);
+                                thread = NULL;
+                        }
+                        if (thread) {
+                                /*
+                                 * Ready thread on local CPU
+                                 */
+                                irq_spinlock_pass(&(cpu->rq[rq].lock),
+                                    &thread->lock);
+#ifdef KCPULB_VERBOSE
+                                log(LF_OTHER, LVL_DEBUG,
+                                    "kcpulb%u: TID %" PRIu64 " -> cpu%u, "
+                                    "nrdy=%ld, avg=%ld", CPU->id, thread->tid,
+                                    CPU->id, atomic_load(&CPU->nrdy),
+                                    atomic_load(&nrdy) / config.cpu_active);
+#endif
+                                thread->stolen = true;
+                                thread->state = Entering;
+                                irq_spinlock_unlock(&thread->lock, true);
+                                thread_ready(thread);
+                                if (--count == 0)
+                                        goto satisfied;
+                                /*
+                                 * We are not satisfied yet, focus on another
+                                 * CPU next time.
+                                 *
+                                 */
+                                acpu_bias++;
+                                continue;
+                        } else
+                                irq_spinlock_unlock(&(cpu->rq[rq].lock), true);
+                        if (steal_thread_from(cpu, rq) && --count == 0)
+                                goto satisfied;
+                }
+        }
 …
+                 *
                  */
                 scheduler();
+                thread_yield();
         } else {
                 /*
 …
                         continue;
+                irq_spinlock_lock(&cpus[cpu].lock, true);
+                printf("cpu%u: address=%p, nrdy=%zu, needs_relink=%zu\n",
+                    cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy),
+                    cpus[cpu].needs_relink);
+                printf("cpu%u: address=%p, nrdy=%zu\n",
+                    cpus[cpu].id, &cpus[cpu], atomic_load(&cpus[cpu].nrdy));
                 unsigned int i;
 …
                             thread) {
                                 printf("%" PRIu64 "(%s) ", thread->tid,
                                     thread_states[thread->state]);
+                                    thread_states[atomic_get_unordered(&thread->state)]);
+                        }
                         printf("\n");
 …
                         irq_spinlock_unlock(&(cpus[cpu].rq[i].lock), false);
+                }
-                irq_spinlock_unlock(&cpus[cpu].lock, true);
+        }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changes in kernel/generic/src/proc/scheduler.c [583c2a3:d23712e] in mainline

Legend:

kernel/generic/src/proc/scheduler.c

Download in other formats: