Index: apps/codecs.h
===================================================================
--- apps/codecs.h	(revision 16785)
+++ apps/codecs.h	(working copy)
@@ -228,7 +228,7 @@
 #if NUM_CORES > 1
     struct thread_entry *
         (*create_thread)(void (*function)(void), void* stack,
-                         int stack_size, unsigned flags, const char *name
+                         size_t stack_size, unsigned flags, const char *name
                          IF_PRIO(, int priority)
                          IF_COP(, unsigned int core));
 
Index: apps/pcmbuf.c
===================================================================
--- apps/pcmbuf.c	(revision 16785)
+++ apps/pcmbuf.c	(working copy)
@@ -116,7 +116,7 @@
 static bool pcmbuf_flush;
 
 #ifdef HAVE_PRIORITY_SCHEDULING
-static int codec_thread_priority = 0;
+static int codec_thread_priority = PRIORITY_PLAYBACK;
 #endif
 
 extern struct thread_entry *codec_thread_p;
@@ -256,18 +256,21 @@
      * will starve if the codec thread's priority is boosted. */
     if (boost)
     {
-        if (codec_thread_priority == 0)
+        int priority = (PRIORITY_PLAYBACK - PRIORITY_PLAYBACK_MAX)*pcmbuf_unplayed_bytes
+                          / (2*NATIVE_FREQUENCY) + PRIORITY_PLAYBACK_MAX;
+
+        if (priority != codec_thread_priority)
         {
-            codec_thread_priority = thread_set_priority(
-                codec_thread_p, PRIORITY_REALTIME);
-            voice_thread_set_priority(PRIORITY_REALTIME);
+            codec_thread_priority = priority;
+            thread_set_priority(codec_thread_p, priority);
+            voice_thread_set_priority(priority);
         }
     }
-    else if (codec_thread_priority != 0)
+    else if (codec_thread_priority != PRIORITY_PLAYBACK)
     {
-        thread_set_priority(codec_thread_p, codec_thread_priority);
-        voice_thread_set_priority(codec_thread_priority);
-        codec_thread_priority = 0;
+        thread_set_priority(codec_thread_p, PRIORITY_PLAYBACK);
+        voice_thread_set_priority(PRIORITY_PLAYBACK);
+        codec_thread_priority = PRIORITY_PLAYBACK;
     }
 }
 #endif /* HAVE_PRIORITY_SCHEDULING */
@@ -818,7 +821,7 @@
     if (low_latency_mode)
     {
         /* 1/4s latency. */
-        if (pcmbuf_unplayed_bytes > NATIVE_FREQUENCY * 4 / 4
+        if (pcmbuf_unplayed_bytes > NATIVE_FREQUENCY * 4 / 2
             && pcm_is_playing())
             return false;
     }
Index: apps/plugins/mpegplayer/stream_mgr.c
===================================================================
--- apps/plugins/mpegplayer/stream_mgr.c	(revision 16785)
+++ apps/plugins/mpegplayer/stream_mgr.c	(working copy)
@@ -987,7 +987,6 @@
 
     stream_mgr.q = &stream_mgr_queue;
     rb->queue_init(stream_mgr.q, false);
-    rb->queue_enable_queue_send(stream_mgr.q, &stream_mgr_queue_send);
 
     /* sets audiosize and returns buffer pointer */
     mem = rb->plugin_get_audio_buffer(&memsize);
@@ -1028,6 +1027,9 @@
         stream_mgr_thread_stack, sizeof(stream_mgr_thread_stack),
         0, "mpgstream_mgr" IF_PRIO(, PRIORITY_SYSTEM) IF_COP(, CPU));
 
+    rb->queue_enable_queue_send(stream_mgr.q, &stream_mgr_queue_send,
+                                stream_mgr.thread);
+
     if (stream_mgr.thread == NULL)
     {
         rb->splash(HZ, "Could not create stream manager thread!");
Index: apps/plugins/mpegplayer/audio_thread.c
===================================================================
--- apps/plugins/mpegplayer/audio_thread.c	(revision 16785)
+++ apps/plugins/mpegplayer/audio_thread.c	(working copy)
@@ -714,13 +714,15 @@
     /* Start the audio thread */
     audio_str.hdr.q = &audio_str_queue;
     rb->queue_init(audio_str.hdr.q, false);
-    rb->queue_enable_queue_send(audio_str.hdr.q, &audio_str_queue_send);
 
     /* One-up on the priority since the core DSP over-yields internally */
     audio_str.thread = rb->create_thread(
         audio_thread, audio_stack, audio_stack_size, 0,
-        "mpgaudio" IF_PRIO(,PRIORITY_PLAYBACK-1) IF_COP(, CPU));
+        "mpgaudio" IF_PRIO(,PRIORITY_PLAYBACK-4) IF_COP(, CPU));
 
+    rb->queue_enable_queue_send(audio_str.hdr.q, &audio_str_queue_send,
+                                audio_str.thread);
+
     if (audio_str.thread == NULL)
         return false;
 
Index: apps/plugins/mpegplayer/disk_buf.c
===================================================================
--- apps/plugins/mpegplayer/disk_buf.c	(revision 16785)
+++ apps/plugins/mpegplayer/disk_buf.c	(working copy)
@@ -837,7 +837,6 @@
 
     disk_buf.q = &disk_buf_queue;
     rb->queue_init(disk_buf.q, false);
-    rb->queue_enable_queue_send(disk_buf.q, &disk_buf_queue_send);
 
     disk_buf.state  = TSTATE_EOS;
     disk_buf.status = STREAM_STOPPED;
@@ -886,6 +885,9 @@
         disk_buf_thread, disk_buf_stack, sizeof(disk_buf_stack), 0,
         "mpgbuffer" IF_PRIO(, PRIORITY_BUFFERING) IF_COP(, CPU));
 
+    rb->queue_enable_queue_send(disk_buf.q, &disk_buf_queue_send,
+                                disk_buf.thread);
+
     if (disk_buf.thread == NULL)
         return false;
 
Index: apps/plugins/mpegplayer/video_thread.c
===================================================================
--- apps/plugins/mpegplayer/video_thread.c	(revision 16785)
+++ apps/plugins/mpegplayer/video_thread.c	(working copy)
@@ -955,7 +955,7 @@
                 else
                 {
                     /* Just a little left - spin and be accurate */
-                    rb->priority_yield();
+                    rb->yield();
                     if (str_have_msg(&video_str))
                         goto message_wait;
                 }
@@ -998,13 +998,15 @@
 
     video_str.hdr.q = &video_str_queue;
     rb->queue_init(video_str.hdr.q, false);
-    rb->queue_enable_queue_send(video_str.hdr.q, &video_str_queue_send);
 
     /* We put the video thread on another processor for multi-core targets. */
     video_str.thread = rb->create_thread(
         video_thread, video_stack, VIDEO_STACKSIZE, 0,
         "mpgvideo" IF_PRIO(,PRIORITY_PLAYBACK) IF_COP(, COP));
 
+    rb->queue_enable_queue_send(video_str.hdr.q, &video_str_queue_send,
+                                video_str.thread);
+
     if (video_str.thread == NULL)
         return false;
 
Index: apps/playback.c
===================================================================
--- apps/playback.c	(revision 16785)
+++ apps/playback.c	(working copy)
@@ -2549,9 +2549,7 @@
        to send messages. Thread creation will be delayed however so nothing
        starts running until ready if something yields such as talk_init. */
     queue_init(&audio_queue, true);
-    queue_enable_queue_send(&audio_queue, &audio_queue_sender_list);
     queue_init(&codec_queue, false);
-    queue_enable_queue_send(&codec_queue, &codec_queue_sender_list);
     queue_init(&pcmbuf_queue, false);
 
     pcm_init();
@@ -2587,11 +2585,17 @@
             codec_thread_name IF_PRIO(, PRIORITY_PLAYBACK)
             IF_COP(, CPU));
 
+    queue_enable_queue_send(&codec_queue, &codec_queue_sender_list,
+                            codec_thread_p);
+
     audio_thread_p = create_thread(audio_thread, audio_stack,
                   sizeof(audio_stack), CREATE_THREAD_FROZEN,
-                  audio_thread_name IF_PRIO(, PRIORITY_SYSTEM)
+                  audio_thread_name IF_PRIO(, PRIORITY_USER_INTERFACE)
                   IF_COP(, CPU));
 
+    queue_enable_queue_send(&audio_queue, &audio_queue_sender_list,
+                            audio_thread_p);
+
 #ifdef PLAYBACK_VOICE
     voice_thread_init();
 #endif
Index: apps/voice_thread.c
===================================================================
--- apps/voice_thread.c	(revision 16785)
+++ apps/voice_thread.c	(working copy)
@@ -424,12 +424,14 @@
 {
     logf("Starting voice thread");
     queue_init(&voice_queue, false);
-    queue_enable_queue_send(&voice_queue, &voice_queue_sender_list);
     mutex_init(&voice_mutex);
     event_init(&voice_event, STATE_SIGNALED | EVENT_MANUAL);
     voice_thread_p = create_thread(voice_thread, voice_stack,
             sizeof(voice_stack), CREATE_THREAD_FROZEN,
             voice_thread_name IF_PRIO(, PRIORITY_PLAYBACK) IF_COP(, CPU));
+
+    queue_enable_queue_send(&voice_queue, &voice_queue_sender_list,
+                            voice_thread_p);
 } /* voice_thread_init */
 
 /* Unfreeze the voice thread */
Index: apps/plugin.c
===================================================================
--- apps/plugin.c	(revision 16785)
+++ apps/plugin.c	(working copy)
@@ -253,15 +253,12 @@
     /* kernel/ system */
     PREFIX(sleep),
     yield,
-#ifdef HAVE_PRIORITY_SCHEDULING
-    priority_yield,
-#endif
     &current_tick,
     default_event_handler,
     default_event_handler_ex,
     threads,
     create_thread,
-    remove_thread,
+    thread_exit,
     thread_wait,
 #if (CONFIG_CODEC == SWCODEC)
     mutex_init,
Index: apps/plugin.h
===================================================================
--- apps/plugin.h	(revision 16785)
+++ apps/plugin.h	(working copy)
@@ -119,12 +119,12 @@
 #define PLUGIN_MAGIC 0x526F634B /* RocK */
 
 /* increase this every time the api struct changes */
-#define PLUGIN_API_VERSION 100
+#define PLUGIN_API_VERSION 101
 
 /* update this to latest version if a change to the api struct breaks
    backwards compatibility (and please take the opportunity to sort in any
    new function which are "waiting" at the end of the function table) */
-#define PLUGIN_MIN_API_VERSION 100
+#define PLUGIN_MIN_API_VERSION 101
 
 /* plugin return codes */
 enum plugin_status {
@@ -351,19 +351,16 @@
     /* kernel/ system */
     void (*PREFIX(sleep))(int ticks);
     void (*yield)(void);
-#ifdef HAVE_PRIORITY_SCHEDULING
-    void (*priority_yield)(void);
-#endif
     volatile long* current_tick;
     long (*default_event_handler)(long event);
     long (*default_event_handler_ex)(long event, void (*callback)(void *), void *parameter);
     struct thread_entry* threads;
     struct thread_entry* (*create_thread)(void (*function)(void), void* stack,
-                                          int stack_size, unsigned flags,
+                                          size_t stack_size, unsigned flags,
                                           const char *name
                                           IF_PRIO(, int priority)
 					                      IF_COP(, unsigned int core));
-    void (*remove_thread)(struct thread_entry *thread);
+    void (*thread_exit)(void);
     void (*thread_wait)(struct thread_entry *thread);
 #if CONFIG_CODEC == SWCODEC
     void (*mutex_init)(struct mutex *m);
@@ -405,7 +402,8 @@
             int ticks);
 #if CONFIG_CODEC == SWCODEC
     void (*queue_enable_queue_send)(struct event_queue *q,
-                                    struct queue_sender_list *send);
+                                    struct queue_sender_list *send,
+                                    struct thread_entry *owner);
     bool (*queue_empty)(const struct event_queue *q);
     void (*queue_wait)(struct event_queue *q, struct queue_event *ev);
     intptr_t (*queue_send)(struct event_queue *q, long id,
Index: apps/buffering.c
===================================================================
--- apps/buffering.c	(revision 16785)
+++ apps/buffering.c	(working copy)
@@ -1446,16 +1446,21 @@
 
 void buffering_init(void) {
     mutex_init(&llist_mutex);
+#ifdef HAVE_PRIORITY_SCHEDULING
+    /* This behavior not safe atm */
+    mutex_set_preempt(&llist_mutex, false);
+#endif
 
     conf_watermark = BUFFERING_DEFAULT_WATERMARK;
 
     queue_init(&buffering_queue, true);
-    queue_enable_queue_send(&buffering_queue, &buffering_queue_sender_list);
-
     buffering_thread_p = create_thread( buffering_thread, buffering_stack,
             sizeof(buffering_stack), CREATE_THREAD_FROZEN,
             buffering_thread_name IF_PRIO(, PRIORITY_BUFFERING)
             IF_COP(, CPU));
+
+    queue_enable_queue_send(&buffering_queue, &buffering_queue_sender_list,
+                            buffering_thread_p);
 }
 
 /* Initialise the buffering subsystem */
Index: apps/debug_menu.c
===================================================================
--- apps/debug_menu.c	(revision 16785)
+++ apps/debug_menu.c	(working copy)
@@ -127,11 +127,6 @@
         [STATE_KILLED]            = 'K',
     };
 
-#if NUM_CORES > 1
-    if (status == STATE_BUSY) /* Not a state index */
-        return '.';
-#endif
-
     if (status > THREAD_NUM_STATES)
         status = THREAD_NUM_STATES;
 
@@ -166,15 +161,15 @@
     thread_get_name(name, 32, thread);
 
     snprintf(buffer, MAX_PATH,
-             "%2d: " IF_COP("(%d) ") "%c%c " IF_PRIO("%d ") "%2d%% %s",
+             "%2d: " IF_COP("(%d) ") "%c%c " IF_PRIO("%d %d ") "%2d%% %s",
              selected_item,
              IF_COP(thread->core,)
 #ifdef HAVE_SCHEDULER_BOOSTCTRL
-             (thread->boosted) ? '+' :
+             (thread->cpu_boost) ? '+' :
 #endif
                  ((thread->state == STATE_RUNNING) ? '*' : ' '),
              thread_status_char(thread->state),
-             IF_PRIO(thread->priority,)
+             IF_PRIO(thread->base_priority, thread->priority, )
              thread_stack_usage(thread), name);
 
     return buffer;
Index: apps/main.c
===================================================================
--- apps/main.c	(revision 16785)
+++ apps/main.c	(working copy)
@@ -270,7 +270,7 @@
 
 static void init(void)
 {
-    init_threads();
+    kernel_init();
     buffer_init();
     set_irq_level(0);
     lcd_init();
Index: firmware/export/kernel.h
===================================================================
--- firmware/export/kernel.h	(revision 16785)
+++ firmware/export/kernel.h	(working copy)
@@ -76,6 +76,8 @@
 #define SYS_SCREENDUMP            MAKE_SYS_EVENT(SYS_EVENT_CLS_MISC, 0)
 #define SYS_CAR_ADAPTER_RESUME    MAKE_SYS_EVENT(SYS_EVENT_CLS_MISC, 1)
 
+#define IS_SYSEVENT(ev)           ((ev & SYS_EVENT) == SYS_EVENT)
+
 struct queue_event
 {
     long     id;
@@ -87,68 +89,92 @@
 {
     /* If non-NULL, there is a thread waiting for the corresponding event */
     /* Must be statically allocated to put in non-cached ram. */
-    struct thread_entry *senders[QUEUE_LENGTH];
+    struct thread_entry *senders[QUEUE_LENGTH]; /* message->thread map */
+    struct thread_entry *list;                  /* list of senders in map */
     /* Send info for last message dequeued or NULL if replied or not sent */
     struct thread_entry *curr_sender;
+#ifdef HAVE_PRIORITY_SCHEDULING
+    struct blocker blocker;
+#endif
 };
 #endif /* HAVE_EXTENDED_MESSAGING_AND_NAME */
 
+#ifdef HAVE_PRIORITY_SCHEDULING
+#define QUEUE_GET_THREAD(q) \
+    (((q)->send == NULL) ? NULL : (q)->send->blocker.thread)
+#else
+/* Queue without priority enabled have no owner provision _at this time_ */
+#define QUEUE_GET_THREAD(q) \
+    (NULL)
+#endif
+
 struct event_queue
 {
-    struct thread_queue queue;         /* Waiter list */
+    struct thread_entry *queue;         /* waiter list */
     struct queue_event events[QUEUE_LENGTH]; /* list of events */
-    unsigned int read;                 /* head of queue */
-    unsigned int write;                /* tail of queue */
+    unsigned int read;                  /* head of queue */
+    unsigned int write;                 /* tail of queue */
 #ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-    struct queue_sender_list *send;    /* list of threads waiting for
-                                          reply to an event */
+    struct queue_sender_list *send;     /* list of threads waiting for
+                                           reply to an event */
+#ifdef HAVE_PRIORITY_SCHEDULING
+    struct blocker *blocker_p;          /* priority inheritance info
+                                           for sync message senders */
 #endif
-#if NUM_CORES > 1
-    struct corelock cl;                /* inter-core sync */
 #endif
+    IF_COP( struct corelock cl; )       /* multiprocessor sync */
 };
 
+#ifdef HAVE_PRIORITY_SCHEDULING
+#define MUTEX_SET_THREAD(m, t) ((m)->blocker.thread = (t))
+#define MUTEX_GET_THREAD(m)    ((m)->blocker.thread)
+#else
+#define MUTEX_SET_THREAD(m, t) ((m)->thread = (t))
+#define MUTEX_GET_THREAD(m)    ((m)->thread)
+#endif
+
 struct mutex
 {
-    struct thread_entry *queue;  /* Waiter list */
-#if CONFIG_CORELOCK == SW_CORELOCK
-    struct corelock cl;          /* inter-core sync */
+    struct thread_entry *queue;         /* waiter list */
+    int count;                          /* lock owner recursion count */
+#ifdef HAVE_PRIORITY_SCHEDULING
+    struct blocker blocker;             /* priority inheritance info
+                                           for waiters */
+    bool no_preempt;                    /* don't allow higher-priority thread
+                                           to be scheduled even if woken */
+#else
+    struct thread_entry *thread;
 #endif
-    struct thread_entry *thread; /* thread that owns lock */
-    int count;                   /* lock owner recursion count */
-    unsigned char locked;        /* locked semaphore */
+    IF_COP( struct corelock cl; )       /* multiprocessor sync */
+    unsigned char locked;               /* locked semaphore */
 };
 
 #if NUM_CORES > 1
 struct spinlock
 {
-    struct corelock cl;          /* inter-core sync */
-    struct thread_entry *thread; /* lock owner */
-    int count;                   /* lock owner recursion count */
+    struct thread_entry *thread;        /* lock owner */
+    int count;                          /* lock owner recursion count */
+    struct corelock cl;                 /* multiprocessor sync */
 };
 #endif
 
 #ifdef HAVE_SEMAPHORE_OBJECTS
 struct semaphore
 {
-    struct thread_entry *queue;  /* Waiter list */
-#if CONFIG_CORELOCK == SW_CORELOCK
-    struct corelock cl;          /* inter-core sync */
-#endif
-    int count;                   /* # of waits remaining before unsignaled */
-    int max;                     /* maximum # of waits to remain signaled */
+    struct thread_entry *queue;         /* Waiter list */
+    int count;                          /* # of waits remaining before unsignaled */
+    int max;                            /* maximum # of waits to remain signaled */
+    IF_COP( struct corelock cl; )       /* multiprocessor sync */
 };
 #endif
 
 #ifdef HAVE_EVENT_OBJECTS
 struct event
 {
-    struct thread_entry *queues[2]; /* waiters for each state */
-#if CONFIG_CORELOCK == SW_CORELOCK
-    struct corelock cl;             /* inter-core sync */
-#endif
-    unsigned char automatic;        /* event performs auto-reset */
-    unsigned char state;            /* state: 1 = signaled */
+    struct thread_entry *queues[2];     /* waiters for each state */
+    unsigned char automatic;            /* event performs auto-reset */
+    unsigned char state;                /* state: 1 = signaled */
+    IF_COP( struct corelock cl; )       /* multiprocessor sync */
 };
 #endif
 
@@ -208,7 +234,9 @@
                              int ticks);
 extern void queue_post(struct event_queue *q, long id, intptr_t data);
 #ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-extern void queue_enable_queue_send(struct event_queue *q, struct queue_sender_list *send);
+extern void queue_enable_queue_send(struct event_queue *q,
+                                    struct queue_sender_list *send,
+                                    struct thread_entry *owner);
 extern intptr_t queue_send(struct event_queue *q, long id, intptr_t data);
 extern void queue_reply(struct event_queue *q, intptr_t retval);
 extern bool queue_in_queue_send(struct event_queue *q);
@@ -223,6 +251,11 @@
 extern void mutex_init(struct mutex *m);
 extern void mutex_lock(struct mutex *m);
 extern void mutex_unlock(struct mutex *m);
+#ifdef HAVE_PRIORITY_SCHEDULING
+/* Temporary function to disable mutex preempting a thread on unlock */
+static inline void mutex_set_preempt(struct mutex *m, bool preempt)
+    { m->no_preempt = !preempt; }
+#endif
 #if NUM_CORES > 1
 extern void spinlock_init(struct spinlock *l);
 extern void spinlock_lock(struct spinlock *l);
@@ -240,6 +273,5 @@
 extern void event_wait(struct event *e, unsigned int for_state);
 extern void event_set_state(struct event *e, unsigned int state);
 #endif /* HAVE_EVENT_OBJECTS */
-#define IS_SYSEVENT(ev) ((ev & SYS_EVENT) == SYS_EVENT)
 
 #endif /* _KERNEL_H_ */
Index: firmware/export/system.h
===================================================================
--- firmware/export/system.h	(revision 16785)
+++ firmware/export/system.h	(working copy)
@@ -159,6 +159,20 @@
 #define H_TO_BE32(x) (x)
 #endif
 
+/* Get the byte offset of a type's member */
+#define OFFSETOF(type, membername) ((off_t)&((type *)0)->membername)
+
+/* Get the type pointer from one of its members */
+#define TYPE_FROM_MEMBER(type, memberptr, membername) \
+    ((type *)((intptr_t)(memberptr) - OFFSETOF(type, membername)))
+
+/* returns index of first set bit + 1 or 0 if no bits are set */
+int find_first_set_bit(uint32_t val);
+
+static inline __attribute__((always_inline))
+uint32_t isolate_first_bit(uint32_t val)
+    { return val & -val; }
+
 /* gcc 3.4 changed the format of the constraints */
 #if (__GNUC__ >= 3) && (__GNUC_MINOR__ > 3) || (__GNUC__ >= 4)
 #define I_CONSTRAINT "I08"
Index: firmware/export/thread.h
===================================================================
--- firmware/export/thread.h	(revision 16785)
+++ firmware/export/thread.h	(working copy)
@@ -26,21 +26,35 @@
 
 /* Priority scheduling (when enabled with HAVE_PRIORITY_SCHEDULING) works
  * by giving high priority threads more CPU time than less priority threads
- * when they need it.
- * 
+ * when they need it. Priority is differential such that the priority
+ * difference between a lower priority runnable thread and the highest priority
+ * runnable thread determines the amount of aging nescessary for the lower
+ * priority thread to be scheduled in order to prevent starvation.
+ *
  * If software playback codec pcm buffer is going down to critical, codec
- * can change it own priority to REALTIME to override user interface and
+ * can gradually raise its own priority to override user interface and
  * prevent playback skipping.
  */
+#define PRIORITY_RESERVED_HIGH   0   /* Reserved */
+#define PRIORITY_RESERVED_LOW    32  /* Reserved */
 #define HIGHEST_PRIORITY         1   /* The highest possible thread priority */
-#define LOWEST_PRIORITY          100 /* The lowest possible thread priority */
-#define PRIORITY_REALTIME        1
-#define PRIORITY_USER_INTERFACE  4   /* The main thread */
-#define PRIORITY_RECORDING       4   /* Recording thread */
-#define PRIORITY_PLAYBACK        4   /* or REALTIME when needed */
-#define PRIORITY_BUFFERING       4   /* Codec buffering thread */
-#define PRIORITY_SYSTEM          6   /* All other firmware threads */
-#define PRIORITY_BACKGROUND      8   /* Normal application threads */
+#define LOWEST_PRIORITY          31  /* The lowest possible thread priority */
+/* Realtime range reserved for threads that will not allow threads of lower
+ * priority to age and run (future expansion) */
+#define PRIORITY_REALTIME_1      1
+#define PRIORITY_REALTIME_2      2
+#define PRIORITY_REALTIME_3      3
+#define PRIORITY_REALTIME_4      4
+#define PRIORITY_REALTIME        4   /* Lowest realtime range */
+#define PRIORITY_USER_INTERFACE  16  /* The main thread */
+#define PRIORITY_RECORDING       16  /* Recording thread */
+#define PRIORITY_PLAYBACK        16  /* Variable between this and MAX */
+#define PRIORITY_PLAYBACK_MAX    5   /* Maximum allowable playback priority */
+#define PRIORITY_BUFFERING       16  /* Codec buffering thread */
+#define PRIORITY_SYSTEM          18  /* All other firmware threads */
+#define PRIORITY_BACKGROUND      20  /* Normal application threads */
+#define NUM_PRIORITIES           32
+#define PRIORITY_IDLE            32  /* Priority representative of no tasks */
 
 /* TODO: Only a minor tweak to create_thread would be needed to let
  * thread slots be caller allocated - no essential threading functionality
@@ -59,80 +73,40 @@
 
 #define DEFAULT_STACK_SIZE 0x400 /* Bytes */
 
-/**
- * "Busy" values that can be swapped into a variable to indicate
- * that the variable or object pointed to is in use by another processor
- * core. When accessed, the busy value is swapped-in while the current
- * value is atomically returned. If the swap returns the busy value,
- * the processor should retry the operation until some other value is
- * returned. When modification is finished, the new value should be
- * written which unlocks it and updates it atomically.
- *
- * Procedure:
- * while ((curr_value = swap(&variable, BUSY_VALUE)) == BUSY_VALUE);
- *
- * Modify/examine object at mem location or variable. Create "new_value"
- * as suitable.
- *
- * variable = new_value or curr_value;
- *
- * To check a value for busy and perform an operation if not:
- * curr_value = swap(&variable, BUSY_VALUE);
- *
- * if (curr_value != BUSY_VALUE)
- * {
- *     Modify/examine object at mem location or variable. Create "new_value"
- *     as suitable.
- *     variable = new_value or curr_value;
- * }
- * else
- * {
- *     Do nothing - already busy
- * }
- *
- * Only ever restore when an actual value is returned or else it could leave
- * the variable locked permanently if another processor unlocked in the
- * meantime. The next access attempt would deadlock for all processors since
- * an abandoned busy status would be left behind.
- */
-#define STATE_BUSYuptr    ((void*)UINTPTR_MAX)
-#define STATE_BUSYu8      UINT8_MAX
-#define STATE_BUSYi       INT_MIN
-
 #ifndef SIMULATOR
 /* Need to keep structures inside the header file because debug_menu
  * needs them. */
 #ifdef CPU_COLDFIRE
 struct regs
 {
-    unsigned int macsr;  /*     0 - EMAC status register */
-    unsigned int d[6];   /*  4-24 - d2-d7 */
-    unsigned int a[5];   /* 28-44 - a2-a6 */
-    void         *sp;    /*    48 - Stack pointer (a7) */
-    void         *start; /*    52 - Thread start address, or NULL when started */
+    uint32_t macsr; /*     0 - EMAC status register */
+    uint32_t d[6];  /*  4-24 - d2-d7 */
+    uint32_t a[5];  /* 28-44 - a2-a6 */
+    uint32_t sp;    /*    48 - Stack pointer (a7) */
+    uint32_t start; /*    52 - Thread start address, or NULL when started */
 };
 #elif CONFIG_CPU == SH7034
 struct regs
 {
-    unsigned int r[7];   /*  0-24 - Registers r8 thru r14 */
-    void         *sp;    /*    28 - Stack pointer (r15) */
-    void         *pr;    /*    32 - Procedure register */
-    void         *start; /*    36 - Thread start address, or NULL when started */
+    uint32_t r[7];  /*  0-24 - Registers r8 thru r14 */
+    uint32_t sp;    /*    28 - Stack pointer (r15) */
+    uint32_t pr;    /*    32 - Procedure register */
+    uint32_t start; /*    36 - Thread start address, or NULL when started */
 };
 #elif defined(CPU_ARM)
 struct regs
 {
-    unsigned int r[8];   /*  0-28 - Registers r4-r11 */
-    void         *sp;    /*    32 - Stack pointer (r13) */
-    unsigned int lr;     /*    36 - r14 (lr) */
-    void         *start; /*    40 - Thread start address, or NULL when started */
+    uint32_t r[8];  /*  0-28 - Registers r4-r11 */
+    uint32_t sp;    /*    32 - Stack pointer (r13) */
+    uint32_t lr;    /*    36 - r14 (lr) */
+    uint32_t start; /*    40 - Thread start address, or NULL when started */
 };
 #endif /* CONFIG_CPU */
 #else
 struct regs
 {
     void *t;             /* Simulator OS thread */
-    void *c;             /* Condition for blocking and sync */
+    void *s;             /* Semaphore for blocking and wakeup */
     void (*start)(void); /* Start function */
 };
 #endif /* !SIMULATOR */
@@ -154,13 +128,13 @@
                             thread_thaw is called with its ID */
     THREAD_NUM_STATES,
     TIMEOUT_STATE_FIRST = STATE_SLEEPING,
-#if NUM_CORES > 1
-    STATE_BUSY = STATE_BUSYu8, /* Thread slot is being examined */
-#endif
 };
 
 #if NUM_CORES > 1
-#define THREAD_DESTRUCT ((const char *)0x84905617)
+/* Pointer value for name field to indicate thread is being killed. Using
+ * an alternate STATE_* won't work since that would interfere with operation
+ * while the thread is still running. */
+#define THREAD_DESTRUCT ((const char *)~(intptr_t)0)
 #endif
 
 /* Link information for lists thread is in */
@@ -188,7 +162,7 @@
 /* Use native atomic swap/exchange instruction */
 struct corelock
 {
-    unsigned char locked;
+    volatile unsigned char locked;
 } __attribute__((packed));
 
 #define corelock_init(cl) \
@@ -207,15 +181,36 @@
 #define corelock_unlock(cl)
 #endif /* core locking selection */
 
-struct thread_queue
+#ifdef HAVE_PRIORITY_SCHEDULING
+struct blocker
 {
-    struct thread_entry *queue; /* list of threads waiting -
-                                   _must_ be first member */
-#if CONFIG_CORELOCK == SW_CORELOCK
-    struct corelock cl;         /* lock for atomic list operations */
-#endif
+    struct thread_entry *thread;   /* thread blocking other threads
+                                      (aka. object owner) */
+    int priority;                  /* highest priority waiter */
+    struct thread_entry * (*wakeup_protocol)(struct thread_entry *thread);
 };
 
+/* Choices of wakeup protocol */
+
+/* For transfer of object ownership by one thread to another thread by
+ * the owning thread itself (mutexes) */
+struct thread_entry *
+    wakeup_priority_protocol_transfer(struct thread_entry *thread);
+
+/* For release by owner where ownership doesn't change - other threads,
+ * interrupts, timeouts, etc. (mutex timeout, queues) */
+struct thread_entry *
+    wakeup_priority_protocol_release(struct thread_entry *thread);
+
+
+struct priority_distribution
+{
+    uint8_t  hist[NUM_PRIORITIES]; /* Histogram: Frequency for each priority */
+    uint32_t mask;                 /* Bitmask of hist entries that are not zero */
+};
+
+#endif /* HAVE_PRIORITY_SCHEDULING */
+
 /* Information kept in each thread slot
  * members are arranged according to size - largest first - in order
  * to ensure both alignment and packing at the same time.
@@ -224,88 +219,83 @@
 {
     struct regs context;       /* Register context at switch -
                                   _must_ be first member */
-    void *stack;               /* Pointer to top of stack */
+    uintptr_t *stack;          /* Pointer to top of stack */
     const char *name;          /* Thread name */
     long tmo_tick;             /* Tick when thread should be woken from
-                                  timeout */
+                                  timeout -
+                                  states: STATE_SLEEPING/STATE_BLOCKED_W_TMO */
     struct thread_list l;      /* Links for blocked/waking/running -
                                   circular linkage in both directions */
     struct thread_list tmo;    /* Links for timeout list -
-                                  Self-pointer-terminated in reverse direction,
-                                  NULL-terminated in forward direction */
-    struct thread_queue *bqp;  /* Pointer to list variable in kernel
+                                  Circular in reverse direction, NULL-terminated in
+                                  forward direction -
+                                  states: STATE_SLEEPING/STATE_BLOCKED_W_TMO */
+    struct thread_entry **bqp; /* Pointer to list variable in kernel
                                   object where thread is blocked - used
-                                  for implicit unblock and explicit wake */
-#if CONFIG_CORELOCK == SW_CORELOCK
-    struct thread_entry **bqnlp; /* Pointer to list variable in kernel
-                                    object where thread is blocked - non-locked
-                                    operations will be used */
+                                  for implicit unblock and explicit wake
+                                  states: STATE_BLOCKED/STATE_BLOCKED_W_TMO  */
+#if NUM_CORES > 1
+    struct corelock *obj_cl;   /* Object corelock where thead is blocked -
+                                  states: STATE_BLOCKED/STATE_BLOCKED_W_TMO */
 #endif
     struct thread_entry *queue; /* List of threads waiting for thread to be
                                   removed */
 #ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-    intptr_t retval;           /* Return value from a blocked operation */
+    #define HAVE_WAKEUP_EXT_CB
+    void (*wakeup_ext_cb)(struct thread_entry *thread); /* Callback that
+                                  performs special steps needed when being
+                                  forced off of an object's wait queue that
+                                  go beyond the standard wait queue removal
+                                  and priority disinheritance */
+    /* Only enabled when using queue_send for now */
 #endif
+#if defined(HAVE_EXTENDED_MESSAGING_AND_NAME) || NUM_CORES > 1
+    intptr_t retval;           /* Return value from a blocked operation/
+                                  misc. use */
+#endif
 #ifdef HAVE_PRIORITY_SCHEDULING
-    long last_run;             /* Last tick when started */
+    /* Priority summary of owned objects that support inheritance */
+    struct blocker *blocker;   /* Pointer to blocker when this thread is blocked
+                                  on an object that supports PIP -
+                                  states: STATE_BLOCKED/STATE_BLOCKED_W_TMO  */
+    struct priority_distribution pdist; /* Priority summary of owned objects
+                                  that have blocked threads and thread's own
+                                  base priority */
+    int skip_count;            /* Number of times skipped if higher priority
+                                  thread was running */
 #endif
     unsigned short stack_size; /* Size of stack in bytes */
 #ifdef HAVE_PRIORITY_SCHEDULING
-    unsigned char priority;    /* Current priority */
-    unsigned char priority_x;  /* Inherited priority - right now just a
-                                  runtime guarantee flag */
+    unsigned char base_priority; /* Base priority (set explicitly during
+                                  creation or thread_set_priority) */
+    unsigned char priority;    /* Scheduled priority (higher of base or
+                                  all threads blocked by this one) */
 #endif
     unsigned char state;       /* Thread slot state (STATE_*) */
+#ifdef HAVE_SCHEDULER_BOOSTCTRL
+    unsigned char cpu_boost;   /* CPU frequency boost flag */
+#endif
 #if NUM_CORES > 1
     unsigned char core;        /* The core to which thread belongs */
+    struct corelock waiter_cl; /* Corelock for thread_wait */
+    struct corelock slot_cl;   /* Corelock to lock thread slot */
 #endif
-#ifdef HAVE_SCHEDULER_BOOSTCTRL
-    unsigned char boosted;     /* CPU frequency boost flag */
-#endif
-#if CONFIG_CORELOCK == SW_CORELOCK
-    struct corelock cl;        /* Corelock to lock thread slot */
-#endif
 };
 
 #if NUM_CORES > 1
 /* Operations to be performed just before stopping a thread and starting
    a new one if specified before calling switch_thread */
-#define TBOP_UNLOCK_LIST     0x01 /* Set a pointer variable address var_ptrp */
-#if CONFIG_CORELOCK == CORELOCK_SWAP
-#define TBOP_SET_VARi        0x02 /* Set an int at address var_ip */
-#define TBOP_SET_VARu8       0x03 /* Set an unsigned char at address var_u8p */
-#define TBOP_VAR_TYPE_MASK   0x03 /* Mask for variable type*/
-#endif /* CONFIG_CORELOCK */
-#define TBOP_UNLOCK_CORELOCK 0x04
-#define TBOP_UNLOCK_THREAD   0x08 /* Unlock a thread's slot */
-#define TBOP_UNLOCK_CURRENT  0x10 /* Unlock the current thread's slot */
-#define TBOP_SWITCH_CORE     0x20 /* Call the core switch preparation routine */
+enum
+{
+    TBOP_CLEAR = 0,       /* No operation to do */
+    TBOP_UNLOCK_CORELOCK, /* Unlock a corelock variable */
+    TBOP_SWITCH_CORE,     /* Call the core switch preparation routine */
+};
 
 struct thread_blk_ops
 {
-#if CONFIG_CORELOCK != SW_CORELOCK
-    union
-    {
-        int var_iv;                   /* int variable value to set */
-        uint8_t var_u8v;              /* unsigned char valur to set */
-        struct thread_entry *list_v;  /* list pointer queue value to set */
-    };
-#endif
-    union
-    {
-#if CONFIG_CORELOCK != SW_CORELOCK
-        int *var_ip;                  /* pointer to int variable */
-        uint8_t *var_u8p;             /* pointer to unsigned char varuable */
-#endif
-        struct thread_queue *list_p;  /* pointer to list variable */
-    };
-#if CONFIG_CORELOCK == SW_CORELOCK
-    struct corelock *cl_p;            /* corelock to unlock */
-    struct thread_entry *thread;      /* thread to unlock */
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-    unsigned char state;              /* new thread state (performs unlock) */
-#endif /* SOFTWARE_CORELOCK */
-    unsigned char flags;    /* TBOP_* flags */
+    struct corelock *cl_p;    /* pointer to corelock */
+    unsigned char    flags;   /* TBOP_* flags */
 };
 #endif /* NUM_CORES > 1 */
 
@@ -316,28 +306,30 @@
 {
     /* "Active" lists - core is constantly active on these and are never
        locked and interrupts do not access them */
-    struct thread_entry *running;  /* threads that are running */
+    struct thread_entry *running;  /* threads that are running (RTR) */
     struct thread_entry *timeout;  /* threads that are on a timeout before
                                       running again */
-    /* "Shared" lists - cores interact in a synchronized manner - access
-       is locked between cores and interrupts */
-    struct thread_queue  waking;   /* intermediate locked list that
-                                      hold threads other core should wake up
-                                      on next task switch */
+    struct thread_entry *block_task; /* Task going off running list */
+#ifdef HAVE_PRIORITY_SCHEDULING
+    struct priority_distribution rtr; /* Summary of running and ready-to-run
+                                         threads */
+#endif
     long next_tmo_check;           /* soonest time to check tmo threads */
 #if NUM_CORES > 1
     struct thread_blk_ops blk_ops; /* operations to perform when
                                       blocking a thread */
-#endif /* NUM_CORES */
 #ifdef HAVE_PRIORITY_SCHEDULING
-    unsigned char highest_priority;
+    struct corelock rtr_cl;        /* Lock for rtr list */
 #endif
+#endif /* NUM_CORES */
 };
 
 #ifdef HAVE_PRIORITY_SCHEDULING
 #define IF_PRIO(...)    __VA_ARGS__
+#define IFN_PRIO(...)
 #else
 #define IF_PRIO(...)
+#define IFN_PRIO(...)   __VA_ARGS__
 #endif
 
 /* Macros generate better code than an inline function is this case */
@@ -464,13 +456,18 @@
 void core_idle(void);
 void core_wake(IF_COP_VOID(unsigned int core));
 
+/* Initialize the scheduler */
+void init_threads(void);
+
+/* Allocate a thread in the scheduler */
 #define CREATE_THREAD_FROZEN   0x00000001 /* Thread is frozen at create time */
 struct thread_entry*
-    create_thread(void (*function)(void), void* stack, int stack_size,
+    create_thread(void (*function)(void), void* stack, size_t stack_size,
                   unsigned flags, const char *name
                   IF_PRIO(, int priority)
 		          IF_COP(, unsigned int core));
 
+/* Set and clear the CPU frequency boost flag for the calling thread */
 #ifdef HAVE_SCHEDULER_BOOSTCTRL
 void trigger_cpu_boost(void);
 void cancel_cpu_boost(void);
@@ -478,86 +475,52 @@
 #define trigger_cpu_boost()
 #define cancel_cpu_boost()
 #endif
+/* Make a frozed thread runnable (when started with CREATE_THREAD_FROZEN).
+ * Has no effect on a thread not frozen. */
 void thread_thaw(struct thread_entry *thread);
+/* Wait for a thread to exit */
 void thread_wait(struct thread_entry *thread);
+/* Exit the current thread */
+void thread_exit(void);
+#if defined(DEBUG) || defined(ROCKBOX_HAS_LOGF)
+#define ALLOW_REMOVE_THREAD
+/* Remove a thread from the scheduler */
 void remove_thread(struct thread_entry *thread);
-void switch_thread(struct thread_entry *old);
+#endif
+
+/* Switch to next runnable thread */
+void switch_thread(void);
+/* Blocks a thread for at least the specified number of ticks (0 = wait until
+ * next tick) */
 void sleep_thread(int ticks);
+/* Indefinitely blocks the current thread on a thread queue */
+void block_thread(struct thread_entry *current);
+/* Blocks the current thread on a thread queue until explicitely woken or
+ * the timeout is reached */
+void block_thread_w_tmo(struct thread_entry *current, int timeout);
 
-/**
- * Setup to allow using thread queues as locked or non-locked without speed
- * sacrifices in both core locking types.
- *
- * The blocking/waking function inline two different version of the real
- * function into the stubs when a software or other separate core locking
- * mechanism is employed.
- *
- * When a simple test-and-set or similar instruction is available, locking
- * has no cost and so one version is used and the internal worker is called
- * directly.
- *
- * CORELOCK_NONE is treated the same as when an atomic instruction can be
- * used.
- */
+/* Return bit flags for thread wakeup */
+#define THREAD_NONE     0x0 /* No thread woken up (exclusive) */
+#define THREAD_OK       0x1 /* A thread was woken up */
+#define THREAD_SWITCH   0x2 /* Task switch recommended (one or more of
+                               higher priority than current were woken) */
 
-/* Blocks the current thread on a thread queue */
-#if CONFIG_CORELOCK == SW_CORELOCK
-void block_thread(struct thread_queue *tq);
-void block_thread_no_listlock(struct thread_entry **list);
-#else
-void _block_thread(struct thread_queue *tq);
-static inline void block_thread(struct thread_queue *tq)
-    { _block_thread(tq); }
-static inline void block_thread_no_listlock(struct thread_entry **list)
-    { _block_thread((struct thread_queue *)list); }
-#endif /* CONFIG_CORELOCK */
-
-/* Blocks the current thread on a thread queue for a max amount of time
- * There is no "_no_listlock" version because timeout blocks without sync on
- * the blocking queues is not permitted since either core could access the
- * list at any time to do an implicit wake. In other words, objects with
- * timeout support require lockable queues. */
-void block_thread_w_tmo(struct thread_queue *tq, int timeout);
-
-/* Wakes up the thread at the head of the queue */
-#define THREAD_WAKEUP_NONE    ((struct thread_entry *)NULL)
-#define THREAD_WAKEUP_MISSING ((struct thread_entry *)(NULL+1))
-#if CONFIG_CORELOCK == SW_CORELOCK
-struct thread_entry * wakeup_thread(struct thread_queue *tq);
-struct thread_entry * wakeup_thread_no_listlock(struct thread_entry **list);
-#else
-struct thread_entry * _wakeup_thread(struct thread_queue *list);
-static inline struct thread_entry * wakeup_thread(struct thread_queue *tq)
-    { return _wakeup_thread(tq); }
-static inline struct thread_entry * wakeup_thread_no_listlock(struct thread_entry **list)
-    { return _wakeup_thread((struct thread_queue *)list); }
-#endif /* CONFIG_CORELOCK */
-
-/* Initialize a thread_queue object. */
-static inline void thread_queue_init(struct thread_queue *tq)
-    { tq->queue = NULL; IF_SWCL(corelock_init(&tq->cl);) }
 /* A convenience function for waking an entire queue of threads. */
-static inline void thread_queue_wake(struct thread_queue *tq)
-    { while (wakeup_thread(tq) != NULL); }
-/* The no-listlock version of thread_queue_wake() */
-static inline void thread_queue_wake_no_listlock(struct thread_entry **list)
-    { while (wakeup_thread_no_listlock(list) != NULL); }
+unsigned int thread_queue_wake(struct thread_entry **list);
 
+/* Wakeup a thread at the head of a list */
+unsigned int wakeup_thread(struct thread_entry **list);
+
 #ifdef HAVE_PRIORITY_SCHEDULING
 int thread_set_priority(struct thread_entry *thread, int priority);
 int thread_get_priority(struct thread_entry *thread);
-/* Yield that guarantees thread execution once per round regardless of
-   thread's scheduler priority - basically a transient realtime boost
-   without altering the scheduler's thread precedence. */
-void priority_yield(void);
-#else
-#define priority_yield  yield
 #endif /* HAVE_PRIORITY_SCHEDULING */
 #if NUM_CORES > 1
 unsigned int switch_core(unsigned int new_core);
 #endif
 struct thread_entry * thread_get_current(void);
-void init_threads(void);
+
+/* Debugging info - only! */
 int thread_stack_usage(const struct thread_entry *thread);
 #if NUM_CORES > 1
 int idle_stack_usage(unsigned int core);
Index: firmware/export/config.h
===================================================================
--- firmware/export/config.h	(revision 16785)
+++ firmware/export/config.h	(working copy)
@@ -371,10 +371,20 @@
 #endif
 
 /* define for all cpus from ARM family */
+#if (CONFIG_CPU == IMX31L)
+#define CPU_ARM
+#define ARM_ARCH 6 /* ARMv6 */
+#endif
+
+#if defined(CPU_TCC77X) || defined(CPU_TCC780X)
+#define CPU_ARM
+#define ARM_ARCH 5 /* ARMv5 */
+#endif
+
 #if defined(CPU_PP) || (CONFIG_CPU == PNX0101) || (CONFIG_CPU == S3C2440) \
-  || (CONFIG_CPU == DSC25) || (CONFIG_CPU == IMX31L) || (CONFIG_CPU == DM320) \
-  || defined(CPU_TCC77X) || defined(CPU_TCC780X)
+  || (CONFIG_CPU == DSC25) || (CONFIG_CPU == DM320)
 #define CPU_ARM
+#define ARM_ARCH 4 /* ARMv4 */
 #endif
 
 /* Determine if accesses should be strictly long aligned. */
Index: firmware/kernel.c
===================================================================
--- firmware/kernel.c	(revision 16785)
+++ firmware/kernel.c	(working copy)
@@ -20,21 +20,30 @@
 #include <string.h>
 #include "config.h"
 #include "kernel.h"
+#ifdef SIMULATOR
+#include "system-sdl.h"
+#include "debug.h"
+#endif
 #include "thread.h"
 #include "cpu.h"
 #include "system.h"
 #include "panic.h"
 
 /* Make this nonzero to enable more elaborate checks on objects */
-#ifdef DEBUG
-#define KERNEL_OBJECT_CHECKS 1 /* Always 1 for DEBUG */
+#if defined(DEBUG) || defined(SIMULATOR)
+#define KERNEL_OBJECT_CHECKS 1 /* Always 1 for DEBUG and sim*/
 #else
 #define KERNEL_OBJECT_CHECKS 0
 #endif
 
 #if KERNEL_OBJECT_CHECKS
+#ifdef SIMULATOR
 #define KERNEL_ASSERT(exp, msg...) \
+    ({ if (!({ exp; })) { DEBUGF(msg); exit(-1); } })
+#else
+#define KERNEL_ASSERT(exp, msg...) \
     ({ if (!({ exp; })) panicf(msg); })
+#endif
 #else
 #define KERNEL_ASSERT(exp, msg...) ({})
 #endif
@@ -52,9 +61,7 @@
 {
     int count;
     struct event_queue *queues[MAX_NUM_QUEUES];
-#if NUM_CORES > 1
-    struct corelock cl;
-#endif
+    IF_COP( struct corelock cl; )
 } all_queues NOCACHEBSS_ATTR;
 
 /****************************************************************************
@@ -77,6 +84,334 @@
     }
 }
 
+/****************************************************************************
+ * Timer tick
+ ****************************************************************************/
+#if CONFIG_CPU == SH7034
+void tick_start(unsigned int interval_in_ms)
+{
+    unsigned long count;
+
+    count = CPU_FREQ * interval_in_ms / 1000 / 8;
+
+    if(count > 0x10000)
+    {
+        panicf("Error! The tick interval is too long (%d ms)\n",
+               interval_in_ms);
+        return;
+    }
+    
+    /* We are using timer 0 */
+    
+    TSTR &= ~0x01; /* Stop the timer */
+    TSNC &= ~0x01; /* No synchronization */
+    TMDR &= ~0x01; /* Operate normally */
+
+    TCNT0 = 0;   /* Start counting at 0 */
+    GRA0 = (unsigned short)(count - 1);
+    TCR0 = 0x23; /* Clear at GRA match, sysclock/8 */
+
+    /* Enable interrupt on level 1 */
+    IPRC = (IPRC & ~0x00f0) | 0x0010;
+    
+    TSR0 &= ~0x01;
+    TIER0 = 0xf9; /* Enable GRA match interrupt */
+
+    TSTR |= 0x01; /* Start timer 1 */
+}
+
+void IMIA0(void) __attribute__ ((interrupt_handler));
+void IMIA0(void)
+{
+    int i;
+
+    /* Run through the list of tick tasks */
+    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
+    {
+        if(tick_funcs[i])
+        {
+            tick_funcs[i]();
+        }
+    }
+
+    current_tick++;
+
+    TSR0 &= ~0x01;
+}
+#elif defined(CPU_COLDFIRE)
+void tick_start(unsigned int interval_in_ms)
+{
+    unsigned long count;
+    int prescale;
+
+    count = CPU_FREQ/2 * interval_in_ms / 1000 / 16;
+
+    if(count > 0x10000)
+    {
+        panicf("Error! The tick interval is too long (%d ms)\n",
+               interval_in_ms);
+        return;
+    }
+
+    prescale = cpu_frequency / CPU_FREQ;
+    /* Note: The prescaler is later adjusted on-the-fly on CPU frequency
+       changes within timer.c */
+    
+    /* We are using timer 0 */
+
+    TRR0 = (unsigned short)(count - 1); /* The reference count */
+    TCN0 = 0; /* reset the timer */
+    TMR0 = 0x001d | ((unsigned short)(prescale - 1) << 8); 
+           /* restart, CLK/16, enabled, prescaler */
+
+    TER0 = 0xff; /* Clear all events */
+
+    ICR1 = 0x8c; /* Interrupt on level 3.0 */
+    IMR &= ~0x200;
+}
+
+void TIMER0(void) __attribute__ ((interrupt_handler));
+void TIMER0(void)
+{
+    int i;
+
+    /* Run through the list of tick tasks */
+    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
+    {
+        if(tick_funcs[i])
+        {
+            tick_funcs[i]();
+        }
+    }
+
+    current_tick++;
+
+    TER0 = 0xff; /* Clear all events */
+}
+
+#elif defined(CPU_PP)
+
+#ifndef BOOTLOADER
+void TIMER1(void)
+{
+    int i;
+
+    /* Run through the list of tick tasks (using main core) */
+    TIMER1_VAL; /* Read value to ack IRQ */
+
+    /* Run through the list of tick tasks using main CPU core - 
+       wake up the COP through its control interface to provide pulse */
+    for (i = 0;i < MAX_NUM_TICK_TASKS;i++)
+    {
+        if (tick_funcs[i])
+        {
+            tick_funcs[i]();
+        }
+    }
+
+#if NUM_CORES > 1
+    /* Pulse the COP */
+    core_wake(COP);
+#endif /* NUM_CORES */
+
+    current_tick++;
+}
+#endif
+
+/* Must be last function called init kernel/thread initialization */
+void tick_start(unsigned int interval_in_ms)
+{
+#ifndef BOOTLOADER
+    TIMER1_CFG = 0x0;
+    TIMER1_VAL;
+    /* enable timer */
+    TIMER1_CFG = 0xc0000000 | (interval_in_ms*1000 - 1);
+    /* unmask interrupt source */
+    CPU_INT_EN = TIMER1_MASK;
+#else
+    /* We don't enable interrupts in the bootloader */
+    (void)interval_in_ms;
+#endif
+}
+
+#elif CONFIG_CPU == PNX0101
+
+void timer_handler(void)
+{
+    int i;
+
+    /* Run through the list of tick tasks */
+    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
+    {
+        if(tick_funcs[i])
+            tick_funcs[i]();
+    }
+
+    current_tick++;
+
+    TIMER0.clr = 0;
+}
+
+void tick_start(unsigned int interval_in_ms)
+{
+    TIMER0.ctrl &= ~0x80; /* Disable the counter */
+    TIMER0.ctrl |= 0x40;  /* Reload after counting down to zero */
+    TIMER0.load = 3000000 * interval_in_ms / 1000;
+    TIMER0.ctrl &= ~0xc;  /* No prescaler */
+    TIMER0.clr = 1;       /* Clear the interrupt request */
+
+    irq_set_int_handler(IRQ_TIMER0, timer_handler);
+    irq_enable_int(IRQ_TIMER0);
+
+    TIMER0.ctrl |= 0x80;  /* Enable the counter */
+}
+#endif
+
+int tick_add_task(void (*f)(void))
+{
+    int i;
+    int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
+
+    /* Add a task if there is room */
+    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
+    {
+        if(tick_funcs[i] == NULL)
+        {
+            tick_funcs[i] = f;
+            set_irq_level(oldlevel);
+            return 0;
+        }
+    }
+    set_irq_level(oldlevel);
+    panicf("Error! tick_add_task(): out of tasks");
+    return -1;
+}
+
+int tick_remove_task(void (*f)(void))
+{
+    int i;
+    int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
+
+    /* Remove a task if it is there */
+    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
+    {
+        if(tick_funcs[i] == f)
+        {
+            tick_funcs[i] = NULL;
+            set_irq_level(oldlevel);
+            return 0;
+        }
+    }
+    
+    set_irq_level(oldlevel);
+    return -1;
+}
+
+/****************************************************************************
+ * Tick-based interval timers/one-shots - be mindful this is not really
+ * intended for continuous timers but for events that need to run for a short
+ * time and be cancelled without further software intervention.
+ ****************************************************************************/
+#ifdef INCLUDE_TIMEOUT_API
+static struct timeout *tmo_list = NULL; /* list of active timeout events */
+
+/* timeout tick task - calls event handlers when they expire
+ * Event handlers may alter ticks, callback and data during operation.
+ */
+static void timeout_tick(void)
+{
+    unsigned long tick = current_tick;
+    struct timeout *curr, *next;
+
+    for (curr = tmo_list; curr != NULL; curr = next)
+    {
+        next = (struct timeout *)curr->next;
+
+        if (TIME_BEFORE(tick, curr->expires))
+            continue;
+
+        /* this event has expired - call callback */
+        if (curr->callback(curr))
+            *(long *)&curr->expires = tick + curr->ticks; /* reload */
+        else
+            timeout_cancel(curr); /* cancel */
+    }
+}
+
+/* Cancels a timeout callback - can be called from the ISR */
+void timeout_cancel(struct timeout *tmo)
+{
+    int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
+
+    if (tmo_list != NULL)
+    {
+        struct timeout *curr = tmo_list;
+        struct timeout *prev = NULL;
+
+        while (curr != tmo && curr != NULL)
+        {
+            prev = curr;
+            curr = (struct timeout *)curr->next;
+        }
+
+        if (curr != NULL)
+        {
+            /* in list */
+            if (prev == NULL)
+                tmo_list = (struct timeout *)curr->next;
+            else
+                *(const struct timeout **)&prev->next = curr->next;
+
+            if (tmo_list == NULL)
+                tick_remove_task(timeout_tick); /* last one - remove task */
+        }
+        /* not in list or tmo == NULL */
+    }
+
+    set_irq_level(oldlevel);
+}
+
+/* Adds a timeout callback - calling with an active timeout resets the
+   interval - can be called from the ISR */
+void timeout_register(struct timeout *tmo, timeout_cb_type callback,
+                      int ticks, intptr_t data)
+{
+    int oldlevel;
+    struct timeout *curr;
+
+    if (tmo == NULL)
+        return;
+
+    oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
+
+    /* see if this one is already registered */
+    curr = tmo_list;
+    while (curr != tmo && curr != NULL)
+        curr = (struct timeout *)curr->next;
+
+    if (curr == NULL)
+    {
+        /* not found - add it */
+        if (tmo_list == NULL)
+            tick_add_task(timeout_tick); /* first one - add task */
+
+        *(struct timeout **)&tmo->next = tmo_list;
+        tmo_list = tmo;
+    }
+
+    tmo->callback = callback;
+    tmo->ticks = ticks;
+    tmo->data = data;
+    *(long *)&tmo->expires = current_tick + ticks;
+
+    set_irq_level(oldlevel);
+}
+
+#endif /* INCLUDE_TIMEOUT_API */
+
+/****************************************************************************
+ * Thread stuff
+ ****************************************************************************/
 void sleep(int ticks)
 {
 #if CONFIG_CPU == S3C2440 && defined(BOOTLOADER)
@@ -96,9 +431,11 @@
 #elif defined(CPU_PP) && defined(BOOTLOADER)
     unsigned stop = USEC_TIMER + ticks * (1000000/HZ);
     while (TIME_BEFORE(USEC_TIMER, stop))
-        switch_thread(NULL);
+        switch_thread();
 #else
+    set_irq_level(HIGHEST_IRQ_LEVEL);
     sleep_thread(ticks);
+    switch_thread();
 #endif
 }
 
@@ -107,7 +444,7 @@
 #if ((CONFIG_CPU == S3C2440 || defined(ELIO_TPJ1022)) && defined(BOOTLOADER))
     /* Some targets don't like yielding in the bootloader */
 #else
-    switch_thread(NULL);
+    switch_thread();
 #endif
 }
 
@@ -116,43 +453,50 @@
  ****************************************************************************/
 
 #ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-/* Moves waiting thread's descriptor to the current sender when a
-   message is dequeued */
-static void queue_fetch_sender(struct queue_sender_list *send,
-                               unsigned int i)
-{
-    struct thread_entry **spp = &send->senders[i];
+/****************************************************************************
+ * Sender thread queue structure that aids implementation of priority
+ * inheritance on queues because the send list structure is the same as
+ * for all other kernel objects:
+ *
+ * Example state:
+ * E0 added with queue_send and removed by thread via queue_wait(_w_tmo)
+ * E3 was posted with queue_post
+ * 4 events remain enqueued (E1-E4)
+ *
+ *                                 rd                          wr
+ * q->events[]:          |  XX  |  E1  |  E2  |  E3  |  E4  |  XX  |
+ * q->send->senders[]:   | NULL |  T1  |  T2  | NULL |  T3  | NULL |
+ *                                 \/     \/            \/
+ * q->send->list:       >->|T0|<->|T1|<->|T2|<-------->|T3|<-<
+ * q->send->curr_sender:    /\
+ *
+ * Thread has E0 in its own struct queue_event.
+ *
+ ****************************************************************************/
 
-    if(*spp)
-    {
-        send->curr_sender = *spp;
-        *spp = NULL;
-    }
-}
-
 /* Puts the specified return value in the waiting thread's return value
  * and wakes the thread.
- * 1) A sender should be confirmed to exist before calling which makes it
- *    more efficent to reject the majority of cases that don't need this
-      called.
- * 2) Requires interrupts disabled since queue overflows can cause posts
- *    from interrupt handlers to wake threads. Not doing so could cause
- *    an attempt at multiple wakes or other problems.
+ *
+ * A sender should be confirmed to exist before calling which makes it
+ * more efficent to reject the majority of cases that don't need this
+ * called.
  */
 static void queue_release_sender(struct thread_entry **sender,
                                  intptr_t retval)
 {
-    (*sender)->retval = retval;
-    wakeup_thread_no_listlock(sender);
-    /* This should _never_ happen - there must never be multiple
-       threads in this list and it is a corrupt state */
-    KERNEL_ASSERT(*sender == NULL, "queue->send slot ovf: %08X", (int)*sender);
+    struct thread_entry *thread = *sender;
+
+    *sender = NULL;               /* Clear slot. */
+    thread->wakeup_ext_cb = NULL; /* Clear callback. */
+    thread->retval = retval;      /* Assign thread-local return value. */
+    *thread->bqp = thread;        /* Move blocking queue head to thread since
+                                     wakeup_thread wakes the first thread in
+                                     the list. */
+    wakeup_thread(thread->bqp);
 }
 
 /* Releases any waiting threads that are queued with queue_send -
  * reply with 0.
- * Disable IRQs and lock before calling since it uses
- * queue_release_sender.
  */
 static void queue_release_all_senders(struct event_queue *q)
 {
@@ -172,25 +516,103 @@
     }
 }
 
+/* Callback to do extra forced removal steps from sender list in addition
+ * to the normal blocking queue removal and priority dis-inherit */
+static void queue_remove_sender_thread_cb(struct thread_entry *thread)
+{
+    *((struct thread_entry **)thread->retval) = NULL;
+    thread->wakeup_ext_cb = NULL;
+    thread->retval = 0;
+}
+
 /* Enables queue_send on the specified queue - caller allocates the extra
-   data structure. Only queues which are taken to be owned by a thread should
-   enable this. Public waiting is not permitted. */
+ * data structure. Only queues which are taken to be owned by a thread should
+ * enable this however an official owner is not compulsory but must be
+ * specified for priority inheritance to operate.
+ *
+ * Use of queue_wait(_w_tmo) by multiple threads on a queue using synchronous
+ * messages results in an undefined order of message replies.
+ */
 void queue_enable_queue_send(struct event_queue *q,
-                             struct queue_sender_list *send)
+                             struct queue_sender_list *send,
+                             struct thread_entry *owner)
 {
     int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
     corelock_lock(&q->cl);
 
-    q->send = NULL;
-    if(send != NULL)
+    if(send != NULL && q->send == NULL)
     {
         memset(send, 0, sizeof(*send));
+#ifdef HAVE_PRIORITY_SCHEDULING
+        send->blocker.wakeup_protocol = wakeup_priority_protocol_release;
+        send->blocker.priority = PRIORITY_IDLE;
+        send->blocker.thread = owner;
+        if(owner != NULL)
+            q->blocker_p = &send->blocker;
+#endif
         q->send = send;
     }
 
     corelock_unlock(&q->cl);
     set_irq_level(oldlevel);
+
+    (void)owner;
 }
+
+/* Unblock a blocked thread at a given event index */
+static inline void queue_do_unblock_sender(struct queue_sender_list *send,
+                                           unsigned int i)
+{
+    if(send)
+    {
+        struct thread_entry **spp = &send->senders[i];
+
+        if(*spp)
+        {
+            queue_release_sender(spp, 0);
+        }
+    }
+}
+
+/* Perform the auto-reply sequence */
+static inline void queue_do_auto_reply(struct queue_sender_list *send)
+{
+    if(send && send->curr_sender)
+    {
+        /* auto-reply */
+        queue_release_sender(&send->curr_sender, 0);
+    }
+}
+
+/* Moves waiting thread's refrence from the senders array to the
+ * current_sender which represents the thread waiting for a reponse to the
+ * last message removed from the queue. This also protects the thread from
+ * being bumped due to overflow which would not be a valid action since its
+ * message _is_ being processed at this point. */
+static inline void queue_do_fetch_sender(struct queue_sender_list *send,
+                                         unsigned int rd)
+{
+    if(send)
+    {
+        struct thread_entry **spp = &send->senders[rd];
+
+        if(*spp)
+        {
+            /* Move thread reference from array to the next thread
+               that queue_reply will release */
+            send->curr_sender = *spp;
+            (*spp)->retval = (intptr_t)spp;
+            *spp = NULL;
+        }
+        /* else message was posted asynchronously with queue_post */
+    }
+}
+#else
+/* Empty macros for when synchoronous sending is not made */
+#define queue_release_all_senders(q)
+#define queue_do_unblock_sender(send, i)
+#define queue_do_auto_reply(send)
+#define queue_do_fetch_sender(send, rd)
 #endif /* HAVE_EXTENDED_MESSAGING_AND_NAME */
 
 /* Queue must not be available for use during this call */
@@ -204,11 +626,12 @@
     }
 
     corelock_init(&q->cl);
-    thread_queue_init(&q->queue);
-    q->read   = 0;
-    q->write  = 0;
+    q->queue = NULL;
+    q->read = 0;
+    q->write = 0;
 #ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-    q->send   = NULL; /* No message sending by default */
+    q->send = NULL; /* No message sending by default */
+    IF_PRIO( q->blocker_p = NULL; )
 #endif
 
     if(register_queue)
@@ -254,14 +677,20 @@
 
     corelock_unlock(&all_queues.cl);
 
-    /* Release threads waiting on queue head */
+    /* Release thread(s) waiting on queue head */
     thread_queue_wake(&q->queue);
 
 #ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-    /* Release waiting threads for reply and reply to any dequeued
-       message waiting for one. */
-    queue_release_all_senders(q);
-    queue_reply(q, 0);
+    if(q->send)
+    {
+        /* Release threads waiting for replies */
+        queue_release_all_senders(q);
+
+        /* Reply to any dequeued message waiting for one */
+        queue_do_auto_reply(q->send);
+
+        q->send = NULL;
+    }
 #endif
 
     q->read = 0;
@@ -279,33 +708,32 @@
     int oldlevel;
     unsigned int rd;
 
+#ifdef HAVE_PRIORITY_SCHEDULING
+    KERNEL_ASSERT(QUEUE_GET_THREAD(q) == NULL ||
+                  QUEUE_GET_THREAD(q) == thread_get_current(),
+                  "queue_wait->wrong thread\n");
+#endif
+
     oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
     corelock_lock(&q->cl);
 
-#ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-    if(q->send && q->send->curr_sender)
-    {
-        /* auto-reply */
-        queue_release_sender(&q->send->curr_sender, 0);
-    }
-#endif
+    /* auto-reply */
+    queue_do_auto_reply(q->send);
     
     if (q->read == q->write)
     {
+        struct thread_entry *current = cores[CURRENT_CORE].running;
+
         do
         {
-#if CONFIG_CORELOCK == CORELOCK_NONE
-#elif CONFIG_CORELOCK == SW_CORELOCK
-            const unsigned int core = CURRENT_CORE;
-            cores[core].blk_ops.flags = TBOP_UNLOCK_CORELOCK;
-            cores[core].blk_ops.cl_p = &q->cl;
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-            const unsigned int core = CURRENT_CORE;
-            cores[core].blk_ops.flags = TBOP_SET_VARu8;
-            cores[core].blk_ops.var_u8p = &q->cl.locked;
-            cores[core].blk_ops.var_u8v = 0;
-#endif /* CONFIG_CORELOCK */
-            block_thread(&q->queue);
+            IF_COP( current->obj_cl = &q->cl; )
+            current->bqp = &q->queue;
+
+            block_thread(current);
+
+            corelock_unlock(&q->cl);
+            switch_thread();
+
             oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
             corelock_lock(&q->cl);
         }
@@ -316,13 +744,8 @@
     rd = q->read++ & QUEUE_LENGTH_MASK;
     *ev = q->events[rd];
 
-#ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-    if(q->send && q->send->senders[rd])
-    {
-        /* Get data for a waiting thread if one */
-        queue_fetch_sender(q->send, rd);
-    }
-#endif
+    /* Get data for a waiting thread if one */
+    queue_do_fetch_sender(q->send, rd);
 
     corelock_unlock(&q->cl);
     set_irq_level(oldlevel);
@@ -332,31 +755,30 @@
 {
     int oldlevel;
 
+#ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
+    KERNEL_ASSERT(QUEUE_GET_THREAD(q) == NULL ||
+                  QUEUE_GET_THREAD(q) == thread_get_current(),
+                  "queue_wait_w_tmo->wrong thread\n");
+#endif
+
     oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
     corelock_lock(&q->cl);
 
-#ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-    if (q->send && q->send->curr_sender)
-    {
-        /* auto-reply */
-        queue_release_sender(&q->send->curr_sender, 0);
-    }
-#endif
+    /* Auto-reply */
+    queue_do_auto_reply(q->send);
 
     if (q->read == q->write && ticks > 0)
     {
-#if CONFIG_CORELOCK == CORELOCK_NONE
-#elif CONFIG_CORELOCK == SW_CORELOCK
-        const unsigned int core = CURRENT_CORE;
-        cores[core].blk_ops.flags = TBOP_UNLOCK_CORELOCK;
-        cores[core].blk_ops.cl_p  = &q->cl;
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-        const unsigned int core = CURRENT_CORE;
-        cores[core].blk_ops.flags = TBOP_SET_VARu8;
-        cores[core].blk_ops.var_u8p = &q->cl.locked;
-        cores[core].blk_ops.var_u8v = 0;
-#endif
-        block_thread_w_tmo(&q->queue, ticks);
+        struct thread_entry *current = cores[CURRENT_CORE].running;
+
+        IF_COP( current->obj_cl = &q->cl; )
+        current->bqp = &q->queue;
+
+        block_thread_w_tmo(current, ticks);
+        corelock_unlock(&q->cl);    
+
+        switch_thread();
+
         oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
         corelock_lock(&q->cl);
     }
@@ -367,14 +789,8 @@
     {
         unsigned int rd = q->read++ & QUEUE_LENGTH_MASK;
         *ev = q->events[rd];
-
-#ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-        if(q->send && q->send->senders[rd])
-        {
-            /* Get data for a waiting thread if one */
-            queue_fetch_sender(q->send, rd);
-        }
-#endif
+        /* Get data for a waiting thread if one */
+        queue_do_fetch_sender(q->send, rd);
     }
     else
     {
@@ -398,19 +814,9 @@
     q->events[wr].id   = id;
     q->events[wr].data = data;
 
-#ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-    if(q->send)
-    {
-        struct thread_entry **spp = &q->send->senders[wr];
+    /* overflow protect - unblock any thread waiting at this index */
+    queue_do_unblock_sender(q->send, wr);
 
-        if (*spp)
-        {
-            /* overflow protect - unblock any thread waiting at this index */
-            queue_release_sender(spp, 0);
-        }
-    }
-#endif
-
     /* Wakeup a waiting thread if any */
     wakeup_thread(&q->queue);
 
@@ -436,8 +842,9 @@
     
     if(q->send)
     {
-        const unsigned int core = CURRENT_CORE;
-        struct thread_entry **spp = &q->send->senders[wr];
+        struct queue_sender_list *send = q->send;
+        struct thread_entry **spp = &send->senders[wr];
+        struct thread_entry *current = cores[CURRENT_CORE].running;
 
         if(*spp)
         {
@@ -448,17 +855,20 @@
         /* Wakeup a waiting thread if any */
         wakeup_thread(&q->queue);
 
-#if CONFIG_CORELOCK == CORELOCK_NONE
-#elif CONFIG_CORELOCK == SW_CORELOCK
-        cores[core].blk_ops.flags = TBOP_UNLOCK_CORELOCK;
-        cores[core].blk_ops.cl_p  = &q->cl; 
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-        cores[core].blk_ops.flags = TBOP_SET_VARu8;
-        cores[core].blk_ops.var_u8p = &q->cl.locked;
-        cores[core].blk_ops.var_u8v = 0;
-#endif
-        block_thread_no_listlock(spp);
-        return cores[core].running->retval;
+        /* Save thread in slot, add to list and wait for reply */
+        *spp = current;
+        IF_COP( current->obj_cl = &q->cl; )
+        IF_PRIO( current->blocker = q->blocker_p; )
+        current->wakeup_ext_cb = queue_remove_sender_thread_cb;
+        current->retval = (intptr_t)spp;
+        current->bqp = &send->list;
+
+        block_thread(current);
+
+        corelock_unlock(&q->cl);
+        switch_thread();
+
+        return current->retval;
     }
 
     /* Function as queue_post if sending is not enabled */
@@ -497,37 +907,22 @@
 {
     if(q->send && q->send->curr_sender)
     {
-#if NUM_CORES > 1
         int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
         corelock_lock(&q->cl);
         /* Double-check locking */
-        if(q->send && q->send->curr_sender)
+        IF_COP( if(q->send && q->send->curr_sender) )
         {
-#endif
-
             queue_release_sender(&q->send->curr_sender, retval);
+        }
 
-#if NUM_CORES > 1
-        }
         corelock_unlock(&q->cl);
         set_irq_level(oldlevel);
-#endif
     }
 }
-#endif /* HAVE_EXTENDED_MESSAGING_AND_NAME */
 
-/* Poll queue to see if a message exists - careful in using the result if
- * queue_remove_from_head is called when messages are posted - possibly use
- * queue_wait_w_tmo(&q, 0) in that case or else a removed message that
- * unsignals the queue may cause an unwanted block */
-bool queue_empty(const struct event_queue* q)
-{
-    return ( q->read == q->write );
-}
-
 bool queue_peek(struct event_queue *q, struct queue_event *ev)
 {
-    if (q->read == q->write)
+    if(q->read == q->write)
          return false;
 
     bool have_msg = false;
@@ -535,7 +930,7 @@
     int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
     corelock_lock(&q->cl);
 
-    if (q->read != q->write)
+    if(q->read != q->write)
     {
         *ev = q->events[q->read & QUEUE_LENGTH_MASK];
         have_msg = true;
@@ -546,7 +941,17 @@
 
     return have_msg;
 }
+#endif /* HAVE_EXTENDED_MESSAGING_AND_NAME */
 
+/* Poll queue to see if a message exists - careful in using the result if
+ * queue_remove_from_head is called when messages are posted - possibly use
+ * queue_wait_w_tmo(&q, 0) in that case or else a removed message that
+ * unsignals the queue may cause an unwanted block */
+bool queue_empty(const struct event_queue* q)
+{
+    return ( q->read == q->write );
+}
+
 void queue_clear(struct event_queue* q)
 {
     int oldlevel;
@@ -554,11 +959,9 @@
     oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
     corelock_lock(&q->cl);
 
-#ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
     /* Release all threads waiting in the queue for a reply -
        dequeued sent message will be handled by owning thread */
     queue_release_all_senders(q);
-#endif
 
     q->read = 0;
     q->write = 0;
@@ -583,18 +986,9 @@
             break;
         }
 
-#ifdef HAVE_EXTENDED_MESSAGING_AND_NAME
-        if(q->send)
-        {
-            struct thread_entry **spp = &q->send->senders[rd];
+        /* Release any thread waiting on this message */
+        queue_do_unblock_sender(q->send, rd);
 
-            if (*spp)
-            {
-                /* Release any thread waiting on this message */
-                queue_release_sender(spp, 0);
-            }
-        }
-#endif
         q->read++;
     }
 
@@ -636,397 +1030,72 @@
 }
 
 /****************************************************************************
- * Timer tick
+ * Simple mutex functions ;)
  ****************************************************************************/
-#if CONFIG_CPU == SH7034
-void tick_start(unsigned int interval_in_ms)
-{
-    unsigned long count;
 
-    count = CPU_FREQ * interval_in_ms / 1000 / 8;
-
-    if(count > 0x10000)
-    {
-        panicf("Error! The tick interval is too long (%d ms)\n",
-               interval_in_ms);
-        return;
-    }
-    
-    /* We are using timer 0 */
-    
-    TSTR &= ~0x01; /* Stop the timer */
-    TSNC &= ~0x01; /* No synchronization */
-    TMDR &= ~0x01; /* Operate normally */
-
-    TCNT0 = 0;   /* Start counting at 0 */
-    GRA0 = (unsigned short)(count - 1);
-    TCR0 = 0x23; /* Clear at GRA match, sysclock/8 */
-
-    /* Enable interrupt on level 1 */
-    IPRC = (IPRC & ~0x00f0) | 0x0010;
-    
-    TSR0 &= ~0x01;
-    TIER0 = 0xf9; /* Enable GRA match interrupt */
-
-    TSTR |= 0x01; /* Start timer 1 */
-}
-
-void IMIA0(void) __attribute__ ((interrupt_handler));
-void IMIA0(void)
-{
-    int i;
-
-    /* Run through the list of tick tasks */
-    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
-    {
-        if(tick_funcs[i])
-        {
-            tick_funcs[i]();
-        }
-    }
-
-    current_tick++;
-
-    TSR0 &= ~0x01;
-}
-#elif defined(CPU_COLDFIRE)
-void tick_start(unsigned int interval_in_ms)
-{
-    unsigned long count;
-    int prescale;
-
-    count = CPU_FREQ/2 * interval_in_ms / 1000 / 16;
-
-    if(count > 0x10000)
-    {
-        panicf("Error! The tick interval is too long (%d ms)\n",
-               interval_in_ms);
-        return;
-    }
-
-    prescale = cpu_frequency / CPU_FREQ;
-    /* Note: The prescaler is later adjusted on-the-fly on CPU frequency
-       changes within timer.c */
-    
-    /* We are using timer 0 */
-
-    TRR0 = (unsigned short)(count - 1); /* The reference count */
-    TCN0 = 0; /* reset the timer */
-    TMR0 = 0x001d | ((unsigned short)(prescale - 1) << 8); 
-           /* restart, CLK/16, enabled, prescaler */
-
-    TER0 = 0xff; /* Clear all events */
-
-    ICR1 = 0x8c; /* Interrupt on level 3.0 */
-    IMR &= ~0x200;
-}
-
-void TIMER0(void) __attribute__ ((interrupt_handler));
-void TIMER0(void)
-{
-    int i;
-
-    /* Run through the list of tick tasks */
-    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
-    {
-        if(tick_funcs[i])
-        {
-            tick_funcs[i]();
-        }
-    }
-
-    current_tick++;
-
-    TER0 = 0xff; /* Clear all events */
-}
-
-#elif defined(CPU_PP)
-
-#ifndef BOOTLOADER
-void TIMER1(void)
-{
-    int i;
-
-    /* Run through the list of tick tasks (using main core) */
-    TIMER1_VAL; /* Read value to ack IRQ */
-
-    /* Run through the list of tick tasks using main CPU core - 
-       wake up the COP through its control interface to provide pulse */
-    for (i = 0;i < MAX_NUM_TICK_TASKS;i++)
-    {
-        if (tick_funcs[i])
-        {
-            tick_funcs[i]();
-        }
-    }
-
-#if NUM_CORES > 1
-    /* Pulse the COP */
-    core_wake(COP);
-#endif /* NUM_CORES */
-
-    current_tick++;
-}
-#endif
-
-/* Must be last function called init kernel/thread initialization */
-void tick_start(unsigned int interval_in_ms)
-{
-#ifndef BOOTLOADER
-    TIMER1_CFG = 0x0;
-    TIMER1_VAL;
-    /* enable timer */
-    TIMER1_CFG = 0xc0000000 | (interval_in_ms*1000 - 1);
-    /* unmask interrupt source */
-    CPU_INT_EN = TIMER1_MASK;
-#else
-    /* We don't enable interrupts in the bootloader */
-    (void)interval_in_ms;
-#endif
-}
-
-#elif CONFIG_CPU == PNX0101
-
-void timer_handler(void)
-{
-    int i;
-
-    /* Run through the list of tick tasks */
-    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
-    {
-        if(tick_funcs[i])
-            tick_funcs[i]();
-    }
-
-    current_tick++;
-
-    TIMER0.clr = 0;
-}
-
-void tick_start(unsigned int interval_in_ms)
-{
-    TIMER0.ctrl &= ~0x80; /* Disable the counter */
-    TIMER0.ctrl |= 0x40;  /* Reload after counting down to zero */
-    TIMER0.load = 3000000 * interval_in_ms / 1000;
-    TIMER0.ctrl &= ~0xc;  /* No prescaler */
-    TIMER0.clr = 1;       /* Clear the interrupt request */
-
-    irq_set_int_handler(IRQ_TIMER0, timer_handler);
-    irq_enable_int(IRQ_TIMER0);
-
-    TIMER0.ctrl |= 0x80;  /* Enable the counter */
-}
-#endif
-
-int tick_add_task(void (*f)(void))
-{
-    int i;
-    int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
-
-    /* Add a task if there is room */
-    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
-    {
-        if(tick_funcs[i] == NULL)
-        {
-            tick_funcs[i] = f;
-            set_irq_level(oldlevel);
-            return 0;
-        }
-    }
-    set_irq_level(oldlevel);
-    panicf("Error! tick_add_task(): out of tasks");
-    return -1;
-}
-
-int tick_remove_task(void (*f)(void))
-{
-    int i;
-    int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
-
-    /* Remove a task if it is there */
-    for(i = 0;i < MAX_NUM_TICK_TASKS;i++)
-    {
-        if(tick_funcs[i] == f)
-        {
-            tick_funcs[i] = NULL;
-            set_irq_level(oldlevel);
-            return 0;
-        }
-    }
-    
-    set_irq_level(oldlevel);
-    return -1;
-}
-
-/****************************************************************************
- * Tick-based interval timers/one-shots - be mindful this is not really
- * intended for continuous timers but for events that need to run for a short
- * time and be cancelled without further software intervention.
- ****************************************************************************/
-#ifdef INCLUDE_TIMEOUT_API
-static struct timeout *tmo_list = NULL; /* list of active timeout events */
-
-/* timeout tick task - calls event handlers when they expire
- * Event handlers may alter ticks, callback and data during operation.
- */
-static void timeout_tick(void)
-{
-    unsigned long tick = current_tick;
-    struct timeout *curr, *next;
-
-    for (curr = tmo_list; curr != NULL; curr = next)
-    {
-        next = (struct timeout *)curr->next;
-
-        if (TIME_BEFORE(tick, curr->expires))
-            continue;
-
-        /* this event has expired - call callback */
-        if (curr->callback(curr))
-            *(long *)&curr->expires = tick + curr->ticks; /* reload */
-        else
-            timeout_cancel(curr); /* cancel */
-    }
-}
-
-/* Cancels a timeout callback - can be called from the ISR */
-void timeout_cancel(struct timeout *tmo)
-{
-    int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
-
-    if (tmo_list != NULL)
-    {
-        struct timeout *curr = tmo_list;
-        struct timeout *prev = NULL;
-
-        while (curr != tmo && curr != NULL)
-        {
-            prev = curr;
-            curr = (struct timeout *)curr->next;
-        }
-
-        if (curr != NULL)
-        {
-            /* in list */
-            if (prev == NULL)
-                tmo_list = (struct timeout *)curr->next;
-            else
-                *(const struct timeout **)&prev->next = curr->next;
-
-            if (tmo_list == NULL)
-                tick_remove_task(timeout_tick); /* last one - remove task */
-        }
-        /* not in list or tmo == NULL */
-    }
-
-    set_irq_level(oldlevel);
-}
-
-/* Adds a timeout callback - calling with an active timeout resets the
-   interval - can be called from the ISR */
-void timeout_register(struct timeout *tmo, timeout_cb_type callback,
-                      int ticks, intptr_t data)
-{
-    int oldlevel;
-    struct timeout *curr;
-
-    if (tmo == NULL)
-        return;
-
-    oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
-
-    /* see if this one is already registered */
-    curr = tmo_list;
-    while (curr != tmo && curr != NULL)
-        curr = (struct timeout *)curr->next;
-
-    if (curr == NULL)
-    {
-        /* not found - add it */
-        if (tmo_list == NULL)
-            tick_add_task(timeout_tick); /* first one - add task */
-
-        *(struct timeout **)&tmo->next = tmo_list;
-        tmo_list = tmo;
-    }
-
-    tmo->callback = callback;
-    tmo->ticks = ticks;
-    tmo->data = data;
-    *(long *)&tmo->expires = current_tick + ticks;
-
-    set_irq_level(oldlevel);
-}
-
-#endif /* INCLUDE_TIMEOUT_API */
-
-/****************************************************************************
- * Simple mutex functions ;)
- ****************************************************************************/
+/* Initialize a mutex object - call before any use and do not call again once
+ * the object is available to other threads */
 void mutex_init(struct mutex *m)
 {
+    corelock_init(&m->cl);
     m->queue = NULL;
-    m->thread = NULL;
     m->count = 0;
     m->locked = 0;
-#if CONFIG_CORELOCK == SW_CORELOCK
-    corelock_init(&m->cl);
+    MUTEX_SET_THREAD(m, NULL);
+#ifdef HAVE_PRIORITY_SCHEDULING
+    m->blocker.priority = PRIORITY_IDLE;
+    m->blocker.wakeup_protocol = wakeup_priority_protocol_transfer;
+    m->no_preempt = false;
 #endif
 }
 
+/* Gain ownership of a mutex object or block until it becomes free */
 void mutex_lock(struct mutex *m)
 {
     const unsigned int core = CURRENT_CORE;
-    struct thread_entry *const thread = cores[core].running;
+    struct thread_entry *current = cores[core].running;
 
-    if(thread == m->thread)
+    if(current == MUTEX_GET_THREAD(m))
     {
+        /* current thread already owns this mutex */
         m->count++;
         return;
     }
 
-    /* Repeat some stuff here or else all the variation is too difficult to
-       read */
-#if CONFIG_CORELOCK == CORELOCK_SWAP
-    /* peek at lock until it's no longer busy */
-    unsigned int locked;
-    while ((locked = xchg8(&m->locked, STATE_BUSYu8)) == STATE_BUSYu8);
-    if(locked == 0)
-    {
-        m->thread = thread;
-        m->locked = 1;
-        return;
-    }
+    /* lock out other cores */
+    corelock_lock(&m->cl);
 
-    /* Block until the lock is open... */
-    cores[core].blk_ops.flags = TBOP_SET_VARu8;
-    cores[core].blk_ops.var_u8p = &m->locked;
-    cores[core].blk_ops.var_u8v = 1;
-#else
-    corelock_lock(&m->cl);
-    if (m->locked == 0)
+    if(m->locked == 0)
     {
+        /* lock is open */
+        MUTEX_SET_THREAD(m, current);
         m->locked = 1;
-        m->thread = thread;
         corelock_unlock(&m->cl);
         return;
     }
 
-    /* Block until the lock is open... */
-#if CONFIG_CORELOCK == SW_CORELOCK
-    cores[core].blk_ops.flags = TBOP_UNLOCK_CORELOCK;
-    cores[core].blk_ops.cl_p = &m->cl;
-#endif
-#endif /* CONFIG_CORELOCK */
+    /* block until the lock is open... */
+    IF_COP( current->obj_cl = &m->cl; )
+    IF_PRIO( current->blocker = &m->blocker; )
+    current->bqp = &m->queue;
 
-    block_thread_no_listlock(&m->queue);
+    set_irq_level(HIGHEST_IRQ_LEVEL);
+    block_thread(current);
+
+    corelock_unlock(&m->cl);
+
+    /* ...and turn control over to next thread */
+    switch_thread();
 }
 
+/* Release ownership of a mutex object - only owning thread must call this */
 void mutex_unlock(struct mutex *m)
 {
     /* unlocker not being the owner is an unlocking violation */
-    KERNEL_ASSERT(m->thread == cores[CURRENT_CORE].running,
-                  "mutex_unlock->wrong thread (recurse)");
+    KERNEL_ASSERT(MUTEX_GET_THREAD(m) == thread_get_current(),
+                  "mutex_unlock->wrong thread (%s != %s)\n",
+                  MUTEX_GET_THREAD(m)->name,
+                  thread_get_current()->name);
 
     if(m->count > 0)
     {
@@ -1035,37 +1104,33 @@
         return;
     }
 
-#if CONFIG_CORELOCK == SW_CORELOCK
     /* lock out other cores */
     corelock_lock(&m->cl);
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-    /* wait for peeker to move on */
-    while (xchg8(&m->locked, STATE_BUSYu8) == STATE_BUSYu8);
-#endif
 
     /* transfer to next queued thread if any */
-
-    /* This can become busy using SWP but is safe since only one thread
-       will be changing things at a time. Allowing timeout waits will
-       change that however but not now. There is also a hazard the thread
-       could be killed before performing the wakeup but that's just
-       irresponsible. :-) */
-    m->thread = m->queue;
-
-    if(m->thread == NULL)
+    if(m->queue == NULL)
     {
-        m->locked = 0; /* release lock */
-#if CONFIG_CORELOCK == SW_CORELOCK
+        /* no threads waiting - open the lock */
+        MUTEX_SET_THREAD(m, NULL);
+        m->locked = 0;
         corelock_unlock(&m->cl);
-#endif
+        return;
     }
-    else /* another thread is waiting - remain locked */
+    else
     {
-        wakeup_thread_no_listlock(&m->queue);
-#if CONFIG_CORELOCK == SW_CORELOCK
+        const int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
+        /* Tranfer of owning thread is handled in the wakeup protocol
+         * if priorities are enabled otherwise just set it from the
+         * queue head. */
+        IFN_PRIO( MUTEX_SET_THREAD(m, m->queue); )
+        IF_PRIO( unsigned int result = ) wakeup_thread(&m->queue);
+        set_irq_level(oldlevel);
+
         corelock_unlock(&m->cl);
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-        m->locked = 1;
+
+#ifdef HAVE_PRIORITY_SCHEDULING
+        if((result & THREAD_SWITCH) && !m->no_preempt)
+            switch_thread();
 #endif
     }
 }
@@ -1083,28 +1148,32 @@
 
 void spinlock_lock(struct spinlock *l)
 {
-    struct thread_entry *const thread = cores[CURRENT_CORE].running;
+    const unsigned int core = CURRENT_CORE;
+    struct thread_entry *current = cores[core].running;
 
-    if (l->thread == thread)
+    if(l->thread == current)
     {
+        /* current core already owns it */
         l->count++;
         return;
     }
 
+    /* lock against other processor cores */
     corelock_lock(&l->cl);
 
-    l->thread = thread;
+    /* take ownership */
+    l->thread = current;
 }
 
 void spinlock_unlock(struct spinlock *l)
 {
     /* unlocker not being the owner is an unlocking violation */
-    KERNEL_ASSERT(l->thread == cores[CURRENT_CORE].running,
-                  "spinlock_unlock->wrong thread");
+    KERNEL_ASSERT(l->thread == thread_get_current(),
+                  "spinlock_unlock->wrong thread\n");
 
-    if (l->count > 0)
+    if(l->count > 0)
     {
-        /* this thread still owns lock */
+        /* this core still owns lock */
         l->count--;
         return;
     }
@@ -1124,76 +1193,62 @@
 void semaphore_init(struct semaphore *s, int max, int start)
 {
     KERNEL_ASSERT(max > 0 && start >= 0 && start <= max,
-                  "semaphore_init->inv arg");
+                  "semaphore_init->inv arg\n");
     s->queue = NULL;
     s->max = max;
     s->count = start;
-#if CONFIG_CORELOCK == SW_CORELOCK
     corelock_init(&s->cl);
-#endif
 }
 
 void semaphore_wait(struct semaphore *s)
 {
-#if CONFIG_CORELOCK == CORELOCK_NONE || CONFIG_CORELOCK == SW_CORELOCK
+    struct thread_entry *current;
+
     corelock_lock(&s->cl);
+
     if(--s->count >= 0)
     {
+        /* wait satisfied */
         corelock_unlock(&s->cl);
         return;
     }
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-    int count;
-    while ((count = xchg32(&s->count, STATE_BUSYi)) == STATE_BUSYi);
-    if(--count >= 0)
-    {
-        s->count = count;
-        return;
-    }
-#endif
 
-    /* too many waits - block until dequeued */
-#if CONFIG_CORELOCK == SW_CORELOCK
-    const unsigned int core = CURRENT_CORE;
-    cores[core].blk_ops.flags = TBOP_UNLOCK_CORELOCK;
-    cores[core].blk_ops.cl_p = &s->cl;
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-    const unsigned int core = CURRENT_CORE;
-    cores[core].blk_ops.flags = TBOP_SET_VARi;
-    cores[core].blk_ops.var_ip = &s->count;
-    cores[core].blk_ops.var_iv = count;
-#endif
-    block_thread_no_listlock(&s->queue);
+    /* too many waits - block until dequeued... */
+    current = cores[CURRENT_CORE].running;
+
+    IF_COP( current->obj_cl = &s->cl; )
+    current->bqp = &s->queue;
+
+    set_irq_level(HIGHEST_IRQ_LEVEL);
+    block_thread(current);
+
+    corelock_unlock(&s->cl);
+
+    /* ...and turn control over to next thread */
+    switch_thread();
 }
 
 void semaphore_release(struct semaphore *s)
 {
-#if CONFIG_CORELOCK == CORELOCK_NONE || CONFIG_CORELOCK == SW_CORELOCK
+    IF_PRIO( unsigned int result = THREAD_NONE; )
+
     corelock_lock(&s->cl);
-    if (s->count < s->max)
+
+    if(s->count < s->max && ++s->count <= 0)
     {
-        if (++s->count <= 0)
-        {
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-    int count;
-    while ((count = xchg32(&s->count, STATE_BUSYi)) == STATE_BUSYi);
-    if(count < s->max)
-    {
-        if(++count <= 0)
-        {
-#endif /* CONFIG_CORELOCK */
-
-            /* there should be threads in this queue */
-            KERNEL_ASSERT(s->queue != NULL, "semaphore->wakeup");
-            /* a thread was queued - wake it up */
-            wakeup_thread_no_listlock(&s->queue);
-        }
+        /* there should be threads in this queue */
+        KERNEL_ASSERT(s->queue != NULL, "semaphore->wakeup\n");
+        /* a thread was queued - wake it up */
+        int oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
+        IF_PRIO( result = ) wakeup_thread(&s->queue);
+        set_irq_level(oldlevel);
     }
 
-#if CONFIG_CORELOCK == SW_CORELOCK
     corelock_unlock(&s->cl);
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-    s->count = count;
+
+#ifdef HAVE_PRIORITY_SCHEDULING
+    if(result & THREAD_SWITCH)
+        switch_thread();
 #endif
 }
 #endif /* HAVE_SEMAPHORE_OBJECTS */
@@ -1208,117 +1263,107 @@
     e->queues[STATE_SIGNALED] = NULL;
     e->state = flags & STATE_SIGNALED;
     e->automatic = (flags & EVENT_AUTOMATIC) ? 1 : 0;
-#if CONFIG_CORELOCK == SW_CORELOCK
     corelock_init(&e->cl);
-#endif
 }
 
 void event_wait(struct event *e, unsigned int for_state)
 {
-   unsigned int last_state;
-#if CONFIG_CORELOCK == CORELOCK_NONE || CONFIG_CORELOCK == SW_CORELOCK
+    struct thread_entry *current;
+
     corelock_lock(&e->cl);
-    last_state = e->state;
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-    while ((last_state = xchg8(&e->state, STATE_BUSYu8)) == STATE_BUSYu8);
-#endif
 
     if(e->automatic != 0)
     {
         /* wait for false always satisfied by definition
            or if it just changed to false */
-        if(last_state == STATE_SIGNALED || for_state == STATE_NONSIGNALED)
+        if(e->state == STATE_SIGNALED || for_state == STATE_NONSIGNALED)
         {
             /* automatic - unsignal */
             e->state = STATE_NONSIGNALED;
-#if CONFIG_CORELOCK == SW_CORELOCK
             corelock_unlock(&e->cl);
-#endif
             return;
         }
         /* block until state matches */
     }
-    else if(for_state == last_state)
+    else if(for_state == e->state)
     {
         /* the state being waited for is the current state */
-#if CONFIG_CORELOCK == SW_CORELOCK
         corelock_unlock(&e->cl);
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-        e->state = last_state;
-#endif
         return;
     }
 
-    {
-        /* current state does not match wait-for state */
-#if CONFIG_CORELOCK == SW_CORELOCK
-        const unsigned int core = CURRENT_CORE;
-        cores[core].blk_ops.flags = TBOP_UNLOCK_CORELOCK;
-        cores[core].blk_ops.cl_p = &e->cl;
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-        const unsigned int core = CURRENT_CORE;
-        cores[core].blk_ops.flags = TBOP_SET_VARu8;
-        cores[core].blk_ops.var_u8p = &e->state;
-        cores[core].blk_ops.var_u8v = last_state;
-#endif
-        block_thread_no_listlock(&e->queues[for_state]);
-    }
+    /* block until state matches what callers requests */
+    current = cores[CURRENT_CORE].running;
+
+    IF_COP( current->obj_cl = &e->cl; )
+    current->bqp = &e->queues[for_state];
+
+    set_irq_level(HIGHEST_IRQ_LEVEL);
+    block_thread(current);
+
+    corelock_unlock(&e->cl);
+
+    /* turn control over to next thread */
+    switch_thread();
 }
 
 void event_set_state(struct event *e, unsigned int state)
 {
-    unsigned int last_state;
-#if CONFIG_CORELOCK == CORELOCK_NONE || CONFIG_CORELOCK == SW_CORELOCK
+    unsigned int result;
+    int oldlevel;
+
     corelock_lock(&e->cl);
-    last_state = e->state;
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-    while ((last_state = xchg8(&e->state, STATE_BUSYu8)) == STATE_BUSYu8);
-#endif
 
-    if(last_state == state)
+    if(e->state == state)
     {
         /* no change */
-#if CONFIG_CORELOCK == SW_CORELOCK
         corelock_unlock(&e->cl);
-#elif CONFIG_CORELOCK == CORELOCK_SWAP
-        e->state = last_state;
-#endif
         return;
     }
 
+    IF_PRIO( result = THREAD_OK; )
+
+    oldlevel = set_irq_level(HIGHEST_IRQ_LEVEL);
+
     if(state == STATE_SIGNALED)
     {
         if(e->automatic != 0)
         {
-            struct thread_entry *thread;
-            /* no thread should have ever blocked for unsignaled */
+            /* no thread should have ever blocked for nonsignaled */
             KERNEL_ASSERT(e->queues[STATE_NONSIGNALED] == NULL,
-                          "set_event_state->queue[NS]:S");
+                          "set_event_state->queue[NS]:S\n");
             /* pass to next thread and keep unsignaled - "pulse" */
-            thread = wakeup_thread_no_listlock(&e->queues[STATE_SIGNALED]);
-            e->state = thread != NULL ? STATE_NONSIGNALED : STATE_SIGNALED;
+            result = wakeup_thread(&e->queues[STATE_SIGNALED]);
+            e->state = (result & THREAD_OK) ? STATE_NONSIGNALED : STATE_SIGNALED;
         }
         else
         {
             /* release all threads waiting for signaled */
-            thread_queue_wake_no_listlock(&e->queues[STATE_SIGNALED]);
             e->state = STATE_SIGNALED;
+            IF_PRIO( result = )
+                thread_queue_wake(&e->queues[STATE_SIGNALED]);
         }
     }
     else
     {
-        /* release all threads waiting for unsignaled */
+        /* release all threads waiting for nonsignaled */
 
         /* no thread should have ever blocked if automatic */
         KERNEL_ASSERT(e->queues[STATE_NONSIGNALED] == NULL ||
-                      e->automatic == 0, "set_event_state->queue[NS]:NS");
+                      e->automatic == 0, "set_event_state->queue[NS]:NS\n");
 
-        thread_queue_wake_no_listlock(&e->queues[STATE_NONSIGNALED]);
         e->state = STATE_NONSIGNALED;
+        IF_PRIO( result = )
+            thread_queue_wake(&e->queues[STATE_NONSIGNALED]);
     }
 
-#if CONFIG_CORELOCK == SW_CORELOCK
+    set_irq_level(oldlevel);
+
     corelock_unlock(&e->cl);
+
+#ifdef HAVE_PRIORITY_SCHEDULING
+    if(result & THREAD_SWITCH)
+        switch_thread();
 #endif
 }
 #endif /* HAVE_EVENT_OBJECTS */
Index: firmware/SOURCES
===================================================================
--- firmware/SOURCES	(revision 16785)
+++ firmware/SOURCES	(working copy)
@@ -9,11 +9,11 @@
 #ifdef ROCKBOX_HAS_LOGF
 logf.c
 #endif /* ROCKBOX_HAS_LOGF */
+kernel.c
 #ifndef SIMULATOR
 #ifdef RB_PROFILE
 profile.c
 #endif /* RB_PROFILE */
-kernel.c
 rolo.c
 thread.c
 timer.c
@@ -274,6 +274,10 @@
 
 #ifndef SIMULATOR
 target/coldfire/crt0.S
+#ifdef HAVE_PRIORITY_SCHEDULING
+common/ffs.c
+target/coldfire/ffs-coldfire.S
+#endif
 target/coldfire/memcpy-coldfire.S
 target/coldfire/memmove-coldfire.S
 target/coldfire/memset-coldfire.S
@@ -299,6 +303,9 @@
 #ifndef SIMULATOR
 target/arm/memset-arm.S
 target/arm/memset16-arm.S
+#ifdef HAVE_PRIORITY_SCHEDULING
+target/arm/ffs-arm.S
+#endif
 #if CONFIG_I2C == I2C_PP5024 || CONFIG_I2C == I2C_PP5020 || CONFIG_I2C == I2C_PP5002
 target/arm/i2c-pp.c
 #elif CONFIG_I2C == I2C_PNX0101
@@ -345,6 +352,9 @@
 
 #else
 
+#ifdef HAVE_PRIORITY_SCHEDULING
+common/ffs.c
+#endif
 common/memcpy.c
 common/memmove.c
 common/memset.c
Index: firmware/target/arm/ffs-arm.S
===================================================================
--- firmware/target/arm/ffs-arm.S	(revision 0)
+++ firmware/target/arm/ffs-arm.S	(revision 0)
@@ -0,0 +1,74 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Michael Sevakis
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+ #include "config.h"
+
+/****************************************************************************
+ * int find_first_set_bit(uint32_t val);
+ *
+ * Find the index of the least significant set bit in the 32-bit word.
+ *
+ * return values:
+ *   0  - bit 0 is set
+ *   1  - bit 1 is set
+ *   ...
+ *   31 - bit 31 is set
+ *   32 - no bits set
+ ****************************************************************************/
+    .align  2
+    .global find_first_set_bit
+    .type   find_first_set_bit,%function
+find_first_set_bit:
+    @ Standard trick to isolate bottom bit in r0 or 0 if r0 = 0 on entry
+    rsb     r2, r0, #0               @ r1 = r0 & -r0
+    ands    r1, r0, r2               @
+    
+    @ now r1 has at most one set bit, call this X
+    
+#if ARM_ARCH >= 5
+    clz     r0, r1                   @ Get lead 0's count
+    rsbne   r0, r0, #31              @ lead 0's -> bit index
+    bx      lr                       @
+#else
+    @ this is the ffs algorithm devised by D.Seal and posted to
+    @ comp.sys.arm on 16 Feb 1994.
+    @
+    @ Output modified to suit Rockbox purposes.
+
+    adr     r2, L_ffs_table
+    orrne   r1, r1, r1, lsl #4       @ r1 = X * 0x11
+    orrne   r1, r1, r1, lsl #6       @ r1 = X * 0x451
+    rsbne   r1, r1, r1, lsl #16      @ r1 = X * 0x0450fbaf
+
+    @ now lookup in table indexed on top 6 bits of r1
+    ldrb    r0, [ r2, r1, lsr #26 ]  @
+    bx      lr                       @
+
+L_ffs_table:
+    @        0   1   2   3   4   5   6   7
+    @----------------------------------------------
+    .byte   32,  0,  1, 12,  2,  6,  0, 13  @  0- 7
+    .byte    3,  0,  7,  0,  0,  0,  0, 14  @  8-15
+    .byte   10,  4,  0,  0,  8,  0,  0, 25  @ 16-23
+    .byte    0,  0,  0,  0,  0, 21, 27, 15  @ 24-31
+    .byte   31, 11,  5,  0,  0,  0,  0,  0  @ 32-39
+    .byte    9,  0,  0, 24,  0,  0, 20, 26  @ 40-47
+    .byte   30,  0,  0,  0,  0, 23,  0, 19  @ 48-55
+    .byte   29,  0, 22, 18, 28, 17, 16,  0  @ 56-63
+#endif
+    .size   find_first_set_bit, .-find_first_set_bit

Property changes on: firmware/target/arm/ffs-arm.S
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Index: firmware/target/arm/i2c-pp.c
===================================================================
--- firmware/target/arm/i2c-pp.c	(revision 16785)
+++ firmware/target/arm/i2c-pp.c	(working copy)
@@ -45,7 +45,7 @@
          if (!(I2C_STATUS & I2C_BUSY)) {
             return 0;
          }
-         priority_yield();
+         yield();
     }
 
     return -1;
Index: firmware/target/arm/sandisk/ata-c200_e200.c
===================================================================
--- firmware/target/arm/sandisk/ata-c200_e200.c	(revision 16785)
+++ firmware/target/arm/sandisk/ata-c200_e200.c	(working copy)
@@ -198,7 +198,7 @@
         if (TIME_AFTER(time, next_yield))
         {
             long ty = USEC_TIMER;
-            priority_yield();
+            yield();
             timeout += USEC_TIMER - ty;
             next_yield = ty + MIN_YIELD_PERIOD;
         }
@@ -317,7 +317,7 @@
         us = USEC_TIMER;
         if (TIME_AFTER(us, next_yield))
         {
-            priority_yield();
+            yield();
             timeout += USEC_TIMER - us;
             next_yield = us + MIN_YIELD_PERIOD;
         }
Index: firmware/target/arm/s3c2440/gigabeat-fx/ata-meg-fx.c
===================================================================
--- firmware/target/arm/s3c2440/gigabeat-fx/ata-meg-fx.c	(revision 16785)
+++ firmware/target/arm/s3c2440/gigabeat-fx/ata-meg-fx.c	(working copy)
@@ -128,7 +128,7 @@
 
     /* Wait for transfer to complete */
     while((DSTAT0 & 0x000fffff))
-        priority_yield();
+        yield();
     /* Dump cache for the buffer  */
 }
 #endif
Index: firmware/target/coldfire/ffs-coldfire.S
===================================================================
--- firmware/target/coldfire/ffs-coldfire.S	(revision 0)
+++ firmware/target/coldfire/ffs-coldfire.S	(revision 0)
@@ -0,0 +1,62 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Michael Sevakis
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+ #include "config.h"
+
+/****************************************************************************
+ * int find_first_set_bit(uint32_t val);
+ *
+ * Find the index of the least significant set bit in the 32-bit word.
+ *
+ * return values:
+ *   0  - bit 0 is set
+ *   1  - bit 1 is set
+ *   ...
+ *   31 - bit 31 is set
+ *   32 - no bits set
+ ****************************************************************************/
+    .text
+    .align  2
+    .global find_first_set_bit
+    .type   find_first_set_bit,@function
+find_first_set_bit:
+    | this is a coldfire version of the ffs algorithm devised by D.Seal
+    | and posted to comp.sys.arm on 16 Feb 1994.
+    |
+    | Output modified to suit rockbox purposes.
+
+    | Standard trick to isolate bottom bit in r0 or 0 if r0 = 0 on entry
+    move.l  4(%sp), %d1          | %d1 = %d1 & -%d1
+    lea.l   L_ffs_table, %a0     | %a0 = table address
+    move.l  %d1, %d0             |
+    neg.l   %d1                  |
+    and.l   %d0, %d1             |
+
+    | now %d1 has at most one set bit, call this X
+
+    move.l  #0x0450fbaf, %d0     | %d0 = multiplier
+    mulu.l  %d0, %d1             | %d1 = X * 0x0450fbaf
+
+    | now lookup in table indexed on top 6 bits of %d0
+    moveq.l #26, %d0             | %d0 = final shift count
+    lsr.l   %d0, %d1             |
+    clr.l   %d0                  |
+    move.b  (%a0, %d1.l), %d0    |
+    rts                          |
+
+    .size   find_first_set_bit, .-find_first_set_bit

Property changes on: firmware/target/coldfire/ffs-coldfire.S
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Index: firmware/common/ffs.c
===================================================================
--- firmware/common/ffs.c	(revision 0)
+++ firmware/common/ffs.c	(revision 0)
@@ -0,0 +1,54 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Michael Sevakis
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include <inttypes.h>
+
+/* find_first_set_bit() - this is a C version of the ffs algorithm devised
+ * by D.Seal and posted to comp.sys.arm on  16 Feb 1994.
+ *
+ * Find the index of the least significant set bit in the word.
+ * return values:
+ *   0  - bit 0 is set
+ *   1  - bit 1 is set
+ *   ...
+ *   31 - bit 31 is set
+ *   32 - no bits set
+ */
+
+/* Table shared with assembly code */
+const uint8_t L_ffs_table[64] ICONST_ATTR =
+{
+/*   0   1   2   3   4   5   6   7           */
+/* ----------------------------------------- */
+    32,  0,  1, 12,  2,  6,  0, 13, /*  0- 7 */
+     3,  0,  7,  0,  0,  0,  0, 14, /*  8-15 */
+    10,  4,  0,  0,  8,  0,  0, 25, /* 16-23 */
+     0,  0,  0,  0,  0, 21, 27, 15, /* 24-31 */
+    31, 11,  5,  0,  0,  0,  0,  0, /* 32-39 */
+     9,  0,  0, 24,  0,  0, 20, 26, /* 40-47 */
+    30,  0,  0,  0,  0, 23,  0, 19, /* 48-55 */
+    29,  0, 22, 18, 28, 17, 16,  0, /* 56-63 */
+};
+
+#if !defined(CPU_COLDFIRE)
+int find_first_set_bit(uint32_t val)
+{
+    return L_ffs_table[((val & -val)*0x0450fbaf) >> 26];
+}
+#endif

Property changes on: firmware/common/ffs.c
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Index: firmware/thread.c
===================================================================
--- firmware/thread.c	(revision 16785)
+++ firmware/thread.c	(working copy)
@@ -28,6 +28,10 @@
 #ifdef RB_PROFILE
 #include <profile.h>
 #endif
+/****************************************************************************
+ *                              ATTENTION!!                                 *
+ *    See notes below on implementing processor-specific portions!          *
+ ***************************************************************************/
 
 /* Define THREAD_EXTRA_CHECKS as 1 to enable additional state checks */
 #ifdef DEBUG
@@ -59,9 +63,7 @@
  * event queues. The kernel object must have a scheme to protect itself from
  * access by another processor and is responsible for serializing the calls
  * to block_thread(_w_tmo) and wakeup_thread both to themselves and to each
- * other. If a thread blocks on an object it must fill-in the blk_ops members
- * for its core to unlock _after_ the thread's context has been saved and the
- * unlocking will be done in reverse from this heirarchy.
+ * other. Objects' queues are also protected here.
  * 
  * 3) Thread Slot
  * This locks access to the thread's slot such that its state cannot be
@@ -70,80 +72,73 @@
  * a thread while it is still blocking will likely desync its state with
  * the other resources used for that state.
  *
- * 4) Lists
- * Usually referring to a list (aka. queue) that a thread will be blocking
- * on that belongs to some object and is shareable amongst multiple
- * processors. Parts of the scheduler may have access to them without actually
- * locking the kernel object such as when a thread is blocked with a timeout
- * (such as calling queue_wait_w_tmo). Of course the kernel object also gets
- * it lists locked when the thread blocks so that all object list access is
- * synchronized. Failure to do so would corrupt the list links.
- *
- * 5) Core Lists
+ * 4) Core Lists
  * These lists are specific to a particular processor core and are accessible
- * by all processor cores and interrupt handlers. They are used when an
- * operation may only be performed by the thread's own core in a normal
- * execution context. The wakeup list is the prime example where a thread
- * may be added by any means and the thread's own core will remove it from
- * the wakeup list and put it on the running list (which is only ever
- * accessible by its own processor).
+ * by all processor cores and interrupt handlers. The running (rtr) list is
+ * the prime example where a thread may be added by any means.
  */
-#define DEADBEEF ((unsigned int)0xdeadbeef)
-/* Cast to the the machine int type, whose size could be < 4. */
+
+/*---------------------------------------------------------------------------
+ * Processor specific: core_sleep/core_wake/misc. notes
+ *
+ * ARM notes:
+ * FIQ is not dealt with by the scheduler code and is simply restored if it
+ * must by masked for some reason - because threading modifies a register
+ * that FIQ may also modify and there's no way to accomplish it atomically.
+ * s3c2440 is such a case.
+ *
+ * Audio interrupts are generally treated at a higher priority than others
+ * usage of scheduler code with interrupts higher than HIGHEST_IRQ_LEVEL
+ * are not in general safe. Special cases may be constructed on a per-
+ * source basis and blocking operations are not available.
+ *
+ * core_sleep procedure to implement for any CPU to ensure an asychronous
+ * wakup never results in requiring a wait until the next tick (up to
+ * 10000uS!). May require assembly and careful instruction ordering.
+ *
+ * 1) On multicore, stay awake if directed to do so by another. If so, goto
+ *    step 4.
+ * 2) If processor requires, atomically reenable interrupts and perform step
+ *    3.
+ * 3) Sleep the CPU core. If wakeup itself enables interrupts (stop #0x2000
+ *    on Coldfire) goto step 5.
+ * 4) Enable interrupts.
+ * 5) Exit procedure.
+ *
+ * core_wake and multprocessor notes for sleep/wake coordination:
+ * If possible, to wake up another processor, the forcing of an interrupt on
+ * the woken core by the waker core is the easiest way to ensure a non-
+ * delayed wake and immediate execution of any woken threads. If that isn't
+ * available then some careful non-blocking synchonization is needed (as on
+ * PP targets at the moment).
+ *---------------------------------------------------------------------------
+ */
+
+/* Cast to the the machine pointer size, whose size could be < 4 or > 32
+ * (someday :). */
+#define DEADBEEF ((uintptr_t)0xdeadbeefdeadbeefull)
 struct core_entry cores[NUM_CORES] IBSS_ATTR;
 struct thread_entry threads[MAXTHREADS] IBSS_ATTR;
 
 static const char main_thread_name[] = "main";
-extern int stackbegin[];
-extern int stackend[];
+extern uintptr_t stackbegin[];
+extern uintptr_t stackend[];
 
-/* core_sleep procedure to implement for any CPU to ensure an asychronous wakup
- * never results in requiring a wait until the next tick (up to 10000uS!). May
- * require assembly and careful instruction ordering.
- *
- * 1) On multicore, stay awake if directed to do so by another. If so, goto step 4.
- * 2) If processor requires, atomically reenable interrupts and perform step 3.
- * 3) Sleep the CPU core. If wakeup itself enables interrupts (stop #0x2000 on Coldfire)
- *    goto step 5.
- * 4) Enable interrupts.
- * 5) Exit procedure.
- */
 static inline void core_sleep(IF_COP_VOID(unsigned int core))
         __attribute__((always_inline));
 
-static void check_tmo_threads(void)
+void check_tmo_threads(void)
         __attribute__((noinline));
 
-static inline void block_thread_on_l(
-    struct thread_queue *list, struct thread_entry *thread, unsigned state)
+static inline void block_thread_on_l(struct thread_entry *thread, unsigned state)
         __attribute__((always_inline));
 
-static inline void block_thread_on_l_no_listlock(
-    struct thread_entry **list, struct thread_entry *thread, unsigned state)
-        __attribute__((always_inline));
-
-static inline void _block_thread_on_l(
-    struct thread_queue *list, struct thread_entry *thread,
-    unsigned state IF_SWCL(, const bool single))
-        __attribute__((always_inline));
-
-IF_SWCL(static inline) struct thread_entry * _wakeup_thread(
-    struct thread_queue *list IF_SWCL(, const bool nolock))
-        __attribute__((IFN_SWCL(noinline) IF_SWCL(always_inline)));
-
-IF_SWCL(static inline) void _block_thread(
-    struct thread_queue *list IF_SWCL(, const bool nolock))
-        __attribute__((IFN_SWCL(noinline) IF_SWCL(always_inline)));
-
 static void add_to_list_tmo(struct thread_entry *thread)
         __attribute__((noinline));
 
 static void core_schedule_wakeup(struct thread_entry *thread)
         __attribute__((noinline));
 
-static inline void core_perform_wakeup(IF_COP_VOID(unsigned int core))
-        __attribute__((always_inline));
-
 #if NUM_CORES > 1
 static inline void run_blocking_ops(
     unsigned int core, struct thread_entry *thread)
@@ -159,10 +154,9 @@
 static inline void load_context(const void* addr)
         __attribute__((always_inline));
 
-void switch_thread(struct thread_entry *old)
+void switch_thread(void)
         __attribute__((noinline));
 
-
 /****************************************************************************
  * Processor-specific section
  */
@@ -172,8 +166,7 @@
  * Start the thread running and terminate it if it returns
  *---------------------------------------------------------------------------
  */
-static void start_thread(void) __attribute__((naked,used));
-static void start_thread(void)
+static void __attribute__((naked,used)) start_thread(void)
 {
     /* r0 = context */
     asm volatile (
@@ -188,19 +181,18 @@
 #endif
         "mov    lr, pc                 \n" /* Call thread function */
         "bx     r4                     \n"
-        "mov    r0, #0                 \n" /* remove_thread(NULL) */
-        "ldr    pc, =remove_thread     \n"
-        ".ltorg                        \n" /* Dump constant pool */
     ); /* No clobber list - new thread doesn't care */
+    thread_exit();
+    //asm volatile (".ltorg"); /* Dump constant pool */
 }
 
 /* For startup, place context pointer in r4 slot, start_thread pointer in r5
  * slot, and thread function pointer in context.start. See load_context for
  * what happens when thread is initially going to run. */
 #define THREAD_STARTUP_INIT(core, thread, function) \
-    ({ (thread)->context.r[0] = (unsigned int)&(thread)->context,  \
-       (thread)->context.r[1] = (unsigned int)start_thread, \
-       (thread)->context.start = (void *)function; })
+    ({ (thread)->context.r[0] = (uint32_t)&(thread)->context,  \
+       (thread)->context.r[1] = (uint32_t)start_thread, \
+       (thread)->context.start = (uint32_t)function; })
 
 /*---------------------------------------------------------------------------
  * Store non-volatile context.
@@ -232,11 +224,11 @@
 #if defined (CPU_PP)
 
 #if NUM_CORES > 1
-extern int cpu_idlestackbegin[];
-extern int cpu_idlestackend[];
-extern int cop_idlestackbegin[];
-extern int cop_idlestackend[];
-static int * const idle_stacks[NUM_CORES] NOCACHEDATA_ATTR =
+extern uintptr_t cpu_idlestackbegin[];
+extern uintptr_t cpu_idlestackend[];
+extern uintptr_t cop_idlestackbegin[];
+extern uintptr_t cop_idlestackend[];
+static uintptr_t * const idle_stacks[NUM_CORES] NOCACHEDATA_ATTR =
 {
     [CPU] = cpu_idlestackbegin,
     [COP] = cop_idlestackbegin
@@ -253,7 +245,7 @@
 };
 
 static struct core_semaphores core_semaphores[NUM_CORES] NOCACHEBSS_ATTR;
-#endif
+#endif /* CONFIG_CPU == PP5002 */
 
 #endif /* NUM_CORES */
 
@@ -401,15 +393,15 @@
  * no other core requested a wakeup for it to perform a task.
  *---------------------------------------------------------------------------
  */
+#ifdef CPU_PP502x
 #if NUM_CORES == 1
-/* Shared single-core build debugging version */
 static inline void core_sleep(void)
 {
     PROC_CTL(CURRENT_CORE) = PROC_SLEEP;
     nop; nop; nop;
-    set_interrupt_status(IRQ_FIQ_ENABLED, IRQ_FIQ_STATUS);
+    set_irq_level(IRQ_ENABLED);
 }
-#elif defined (CPU_PP502x)
+#else
 static inline void core_sleep(unsigned int core)
 {
 #if 1
@@ -429,8 +421,8 @@
         "ldr    r1, [%[mbx], #0]           \n"
         "tst    r1, r0, lsr #2             \n"
         "bne    1b                         \n"
-        "mrs    r1, cpsr                   \n" /* Enable interrupts */
-        "bic    r1, r1, #0xc0              \n"
+        "mrs    r1, cpsr                   \n" /* Enable IRQ */
+        "bic    r1, r1, #0x80              \n"
         "msr    cpsr_c, r1                 \n"
         :
         :  [ctl]"r"(&PROC_CTL(CPU)), [mbx]"r"(MBX_BASE), [c]"r"(core)
@@ -452,11 +444,36 @@
     /* Wait for other processor to finish wake procedure */
     while (MBX_MSG_STAT & (0x1 << core));
 
-    /* Enable IRQ, FIQ */
-    set_interrupt_status(IRQ_FIQ_ENABLED, IRQ_FIQ_STATUS);
+    /* Enable IRQ */
+    set_irq_level(IRQ_ENABLED);
 #endif /* ASM/C selection */
 }
+#endif /* NUM_CORES */
 #elif CONFIG_CPU == PP5002
+#if NUM_CORES == 1
+static inline void core_sleep(void)
+{
+    asm volatile (
+        /* Sleep: PP5002 crashes if the instruction that puts it to sleep is
+         * located at 0xNNNNNNN0. 4/8/C works. This sequence makes sure
+         * that the correct alternative is executed. Don't change the order
+         * of the next 4 instructions! */
+        "tst    pc, #0x0c     \n"
+        "mov    r0, #0xca     \n"
+        "strne  r0, [%[ctl]]  \n"
+        "streq  r0, [%[ctl]]  \n"
+        "nop                  \n" /* nop's needed because of pipeline */
+        "nop                  \n"
+        "nop                  \n"
+        "mrs    r0, cpsr      \n" /* Enable IRQ */
+        "bic    r0, r0, #0x80 \n"
+        "msr    cpsr_c, r0    \n"
+        :
+        : [ctl]"r"(&PROC_CTL(CURRENT_CORE))
+        : "r0"
+    );
+}
+#else
 /* PP5002 has no mailboxes - emulate using bytes */
 static inline void core_sleep(unsigned int core)
 {
@@ -486,8 +503,8 @@
         "ldrb   r0, [%[sem], #0]           \n"
         "cmp    r0, #0                     \n"
         "bne    1b                         \n"
-        "mrs    r0, cpsr                   \n" /* Enable interrupts */
-        "bic    r0, r0, #0xc0              \n"
+        "mrs    r0, cpsr                   \n" /* Enable IRQ */
+        "bic    r0, r0, #0x80              \n"
         "msr    cpsr_c, r0                 \n"
         :
         : [sem]"r"(&core_semaphores[core]), [c]"r"(core),
@@ -512,11 +529,12 @@
     /* Wait for other processor to finish wake procedure */
     while (core_semaphores[core].intend_wake != 0);
 
-    /* Enable IRQ, FIQ */
-    set_interrupt_status(IRQ_FIQ_ENABLED, IRQ_FIQ_STATUS);
+    /* Enable IRQ */
+    set_irq_level(IRQ_ENABLED);
 #endif /* ASM/C selection */
 }
-#endif /* CPU type */
+#endif /* NUM_CORES */
+#endif /* PP CPU type */
 
 /*---------------------------------------------------------------------------
  * Wake another processor core that is sleeping or prevent it from doing so
@@ -553,7 +571,7 @@
         "strne  r1, [%[ctl], %[oc], lsl #2] \n"
         "mov    r1, r2, lsr #4              \n"
         "str    r1, [%[mbx], #8]            \n" /* Done with wake procedure */
-        "msr    cpsr_c, r3                  \n" /* Restore int status */
+        "msr    cpsr_c, r3                  \n" /* Restore IRQ */
         :
         : [ctl]"r"(&PROC_CTL(CPU)), [mbx]"r"(MBX_BASE),
           [oc]"r"(othercore)
@@ -604,7 +622,7 @@
         "strne  r1, [r2, %[oc], lsl #2] \n"
         "mov    r1, #0                  \n" /* Done with wake procedure */
         "strb   r1, [%[sem], #0]        \n"
-        "msr    cpsr_c, r3              \n" /* Restore int status */
+        "msr    cpsr_c, r3              \n" /* Restore IRQ */
         :
         : [sem]"r"(&core_semaphores[othercore]),
           [st]"r"(&PROC_STAT),
@@ -640,8 +658,8 @@
  *
  * Needed when a thread suicides on a core other than the main CPU since the
  * stack used when idling is the stack of the last thread to run. This stack
- * may not reside in the core in which case the core will continue to use a
- * stack from an unloaded module until another thread runs on it.
+ * may not reside in the core firmware in which case the core will continue
+ * to use a stack from an unloaded module until another thread runs on it.
  *---------------------------------------------------------------------------
  */
 static inline void switch_to_idle_stack(const unsigned int core)
@@ -670,11 +688,11 @@
     /* Flush our data to ram */
     flush_icache();
     /* Stash thread in r4 slot */
-    thread->context.r[0] = (unsigned int)thread;
+    thread->context.r[0] = (uint32_t)thread;
     /* Stash restart address in r5 slot */
-    thread->context.r[1] = (unsigned int)thread->context.start;
+    thread->context.r[1] = thread->context.start;
     /* Save sp in context.sp while still running on old core */
-    thread->context.sp = (void*)idle_stacks[core][IDLE_STACK_WORDS-1];
+    thread->context.sp = idle_stacks[core][IDLE_STACK_WORDS-1];
 }
 
 /*---------------------------------------------------------------------------
@@ -689,9 +707,8 @@
 /*---------------------------------------------------------------------------
  * This actually performs the core switch.
  */
-static void switch_thread_core(unsigned int core, struct thread_entry *thread)
-        __attribute__((naked));
-static void switch_thread_core(unsigned int core, struct thread_entry *thread)
+static void __attribute__((naked))
+    switch_thread_core(unsigned int core, struct thread_entry *thread)
 {
     /* Pure asm for this because compiler behavior isn't sufficiently predictable.
      * Stack access also isn't permitted until restoring the original stack and
@@ -705,7 +722,6 @@
         "mov    sp, r2                 \n" /* switch stacks */
         "adr    r2, 1f                 \n" /* r2 = new core restart address */
         "str    r2, [r1, #40]          \n" /* thread->context.start = r2 */
-        "mov    r0, r1                 \n" /* switch_thread(thread) */
         "ldr    pc, =switch_thread     \n" /* r0 = thread after call - see load_context */
     "1:                                \n"
         "ldr    sp, [r0, #32]          \n" /* Reload original sp from context structure */
@@ -733,13 +749,15 @@
     /* FIQ also changes the CLKCON register so FIQ must be disabled
        when changing it here */
     asm volatile (
-        "mrs    r0, cpsr        \n" /* Prepare IRQ, FIQ enable */
-        "bic    r0, r0, #0xc0   \n"
+        "mrs    r0, cpsr        \n"
+        "orr    r2, r0, #0x40   \n" /* Disable FIQ */
+        "bic    r0, r0, #0x80   \n" /* Prepare IRQ enable */
+        "msr    cpsr_c, r2      \n"
         "mov    r1, #0x4c000000 \n" /* CLKCON = 0x4c00000c */
         "ldr    r2, [r1, #0xc]  \n" /* Set IDLE bit */
         "orr    r2, r2, #4      \n"
         "str    r2, [r1, #0xc]  \n"
-        "msr    cpsr_c, r0      \n" /* Enable IRQ, FIQ */
+        "msr    cpsr_c, r0      \n" /* Enable IRQ, restore FIQ */
         "mov    r2, #0          \n" /* wait for IDLE */
     "1:                         \n"
         "add    r2, r2, #1      \n"
@@ -750,13 +768,14 @@
         "ldr    r2, [r1, #0xc]  \n" /* Reset IDLE bit */
         "bic    r2, r2, #4      \n"
         "str    r2, [r1, #0xc]  \n"
-        "msr    cpsr_c, r0      \n" /* Enable IRQ, FIQ */
+        "msr    cpsr_c, r0      \n" /* Enable IRQ, restore FIQ */
         :  :  : "r0", "r1", "r2");
 }
 #elif defined(CPU_TCC77X)
 static inline void core_sleep(void)
 {
     #warning TODO: Implement core_sleep
+    set_irq_level(IRQ_ENABLED);
 }
 #elif defined(CPU_TCC780X)
 static inline void core_sleep(void)
@@ -765,8 +784,8 @@
     asm volatile (
         "mov r0, #0                \n"
         "mcr p15, 0, r0, c7, c0, 4 \n" /* Wait for interrupt */
-        "mrs r0, cpsr              \n" /* Unmask IRQ/FIQ at core level */
-        "bic r0, r0, #0xc0         \n"
+        "mrs r0, cpsr              \n" /* Unmask IRQ at core level */
+        "bic r0, r0, #0x80         \n"
         "msr cpsr_c, r0            \n"
         : : : "r0"
     );
@@ -777,8 +796,8 @@
     asm volatile (
         "mov r0, #0                \n"
         "mcr p15, 0, r0, c7, c0, 4 \n" /* Wait for interrupt */
-        "mrs r0, cpsr              \n" /* Unmask IRQ/FIQ at core level */
-        "bic r0, r0, #0xc0         \n"
+        "mrs r0, cpsr              \n" /* Unmask IRQ at core level */
+        "bic r0, r0, #0x80         \n"
         "msr cpsr_c, r0            \n"
         : : : "r0"
     );
@@ -787,6 +806,7 @@
 static inline void core_sleep(void)
 {
     #warning core_sleep not implemented, battery life will be decreased
+    set_irq_level(0);
 }
 #endif /* CONFIG_CPU == */
 
@@ -796,8 +816,7 @@
  *---------------------------------------------------------------------------
  */
 void start_thread(void); /* Provide C access to ASM label */
-static void __start_thread(void) __attribute__((used));
-static void __start_thread(void)
+static void __attribute__((used)) __start_thread(void)
 {
     /* a0=macsr, a1=context */
     asm volatile (
@@ -808,9 +827,8 @@
         "move.l  (%a1), %a2    \n" /* Fetch thread function pointer */
         "clr.l   (%a1)         \n" /* Mark thread running */
         "jsr     (%a2)         \n" /* Call thread function */
-        "clr.l   -(%sp)        \n" /* remove_thread(NULL) */
-        "jsr     remove_thread \n"
     );
+    thread_exit();
 }
 
 /* Set EMAC unit to fractional mode with saturation for each new thread,
@@ -823,9 +841,9 @@
  */
 #define THREAD_STARTUP_INIT(core, thread, function) \
     ({ (thread)->context.macsr = EMAC_FRACTIONAL | EMAC_SATURATE, \
-       (thread)->context.d[0] = (unsigned int)&(thread)->context, \
-       (thread)->context.d[1] = (unsigned int)start_thread,       \
-       (thread)->context.start = (void *)(function); })
+       (thread)->context.d[0] = (uint32_t)&(thread)->context, \
+       (thread)->context.d[1] = (uint32_t)start_thread,       \
+       (thread)->context.start = (uint32_t)(function); })
 
 /*---------------------------------------------------------------------------
  * Store non-volatile context.
@@ -874,8 +892,7 @@
  *---------------------------------------------------------------------------
  */
 void start_thread(void); /* Provide C access to ASM label */
-static void __start_thread(void) __attribute__((used));
-static void __start_thread(void)
+static void __attribute__((used)) __start_thread(void)
 {
     /* r8 = context */
     asm volatile (
@@ -885,20 +902,16 @@
         "mov    #0, r1         \n" /* Start the thread */
         "jsr    @r0            \n"
         "mov.l  r1, @(36, r8)  \n" /* Clear start address */
-        "mov.l  1f, r0         \n" /* remove_thread(NULL) */
-        "jmp    @r0            \n"
-        "mov    #0, r4         \n"
-    "1:                        \n"
-        ".long  _remove_thread \n"
     );
+    thread_exit();
 }
 
 /* Place context pointer in r8 slot, function pointer in r9 slot, and
  * start_thread pointer in context_start */
 #define THREAD_STARTUP_INIT(core, thread, function) \
-    ({ (thread)->context.r[0] = (unsigned int)&(thread)->context, \
-       (thread)->context.r[1] = (unsigned int)(function),         \
-       (thread)->context.start = (void*)start_thread; })
+    ({ (thread)->context.r[0] = (uint32_t)&(thread)->context, \
+       (thread)->context.r[1] = (uint32_t)(function),         \
+       (thread)->context.start = (uint32_t)start_thread; })
 
 /*---------------------------------------------------------------------------
  * Store non-volatile context.
@@ -947,7 +960,7 @@
 }
 
 /*---------------------------------------------------------------------------
- * Put core in a power-saving state if waking list wasn't repopulated.
+ * Put core in a power-saving state.
  *---------------------------------------------------------------------------
  */
 static inline void core_sleep(void)
@@ -969,9 +982,7 @@
 #if THREAD_EXTRA_CHECKS
 static void thread_panicf(const char *msg, struct thread_entry *thread)
 {
-#if NUM_CORES > 1
-    const unsigned int core = thread->core;
-#endif
+    IF_COP( const unsigned int core = thread->core; )
     static char name[32];
     thread_get_name(name, 32, thread);
     panicf ("%s %s" IF_COP(" (%d)"), msg, name IF_COP(, core));
@@ -987,9 +998,7 @@
 #else
 static void thread_stkov(struct thread_entry *thread)
 {
-#if NUM_CORES > 1
-    const unsigned int core = thread->core;
-#endif
+    IF_COP( const unsigned int core = thread->core; )
     static char name[32];
     thread_get_name(name, 32, thread);
     panicf("Stkov %s" IF_COP(" (%d)"), name IF_COP(, core));
@@ -998,111 +1007,67 @@
 #define THREAD_ASSERT(exp, msg, thread)
 #endif /* THREAD_EXTRA_CHECKS */
 
-/*---------------------------------------------------------------------------
- * Lock a list pointer and returns its value
- *---------------------------------------------------------------------------
- */
-#if CONFIG_CORELOCK == SW_CORELOCK
-/* Separate locking function versions */
-
 /* Thread locking */
-#define GET_THREAD_STATE(thread) \
-    ({ corelock_lock(&(thread)->cl); (thread)->state; })
-#define TRY_GET_THREAD_STATE(thread) \
-    ({ corelock_try_lock(&thread->cl) ? thread->state : STATE_BUSY; })
-#define UNLOCK_THREAD(thread, state) \
-    ({ corelock_unlock(&(thread)->cl); })
-#define UNLOCK_THREAD_SET_STATE(thread, _state) \
-    ({ (thread)->state = (_state); corelock_unlock(&(thread)->cl); })
+#if NUM_CORES > 1
+#define LOCK_THREAD(thread) \
+    ({ corelock_lock(&(thread)->slot_cl); })
+#define TRY_LOCK_THREAD(thread) \
+    ({ corelock_try_lock(&thread->slot_cl); })
+#define UNLOCK_THREAD(thread) \
+    ({ corelock_unlock(&(thread)->slot_cl); })
+#define UNLOCK_THREAD_AT_TASK_SWITCH(thread) \
+    ({ unsigned int _core = (thread)->core; \
+       cores[_core].blk_ops.flags |= TBOP_UNLOCK_CORELOCK; \
+       cores[_core].blk_ops.cl_p = &(thread)->slot_cl; })
+#else
+#define LOCK_THREAD(thread) \
+    ({ })
+#define TRY_LOCK_THREAD(thread) \
+    ({ })
+#define UNLOCK_THREAD(thread) \
+    ({ })
+#define UNLOCK_THREAD_AT_TASK_SWITCH(thread) \
+    ({ })
+#end