[Precise SRU] Call xen_poll_irq with interrupts disabled

Mon Feb 4 16:39:41 UTC 2013

This has been sitting around for a while now. The problem is that
I was not yet able to find a convincing explanation *why* this is
fixing things. But testing was very positive. So its probably better
to carry it as SAUCE for now, at least for Precise.
I need to re-check things with latest kernels and Xen versions.

-Stefan

---


>From e89064720b518e3c64f0fe56b11edb44125f8b7e Mon Sep 17 00:00:00 2001
From: Stefan Bader <stefan.bader at canonical.com>
Date: Thu, 18 Oct 2012 21:40:37 +0200
Subject: [PATCH] UBUNTU: SAUCE: xen/pv-spinlock: Never enable interrupts in
 xen_spin_lock_slow()

The pv-ops spinlock code for Xen will call xen_spin_lock_slow()
if a lock cannot be obtained quickly by normal spinning. The
slow variant will put the VCPU onto a waiters list, then
enable interrupts (which are event channel messages in this
case) and calls a hypervisor function that is supposed to return
when the lock is released.

Using a test case which has a lot of threads running that cause
a high utilization of this spinlock code, it is observed that
the VCPU which is the first one on the waiters list seems to be
doing other things (no trace of the hv call on the stack) while
the waiters list still has it as the head. So other VCPUs which
try to acquire the lock are stuck in the hv call forever. And
that sooner than later end up in some circular locking.

By testing I can see this gets avoided when the interrupts are
not enabled before the hv call (and disabled right after).
In theory this could cause a performance degression. Though it
looked like the hypervisor would actuall re-enable the interrupts
after some safety measures. So maybe not much regression at all.
At least testing did not yield a significant penalty.

Xen PVM is the only affected setup because that is the only
one using pv-ops spinlocks in a way that changes interrupt
flags (KVM maps the variant which could to the function that
does not modify those flags).

BugLink: http://bugs.launchpad.net/bugs/1011792

Signed-off-by: Stefan Bader <stefan.bader at canonical.com>
---
 arch/x86/xen/spinlock.c |   23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index d69cc6c..fa926ab 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -24,7 +24,6 @@ static struct xen_spinlock_stats
 	u32 taken_slow_nested;
 	u32 taken_slow_pickup;
 	u32 taken_slow_spurious;
-	u32 taken_slow_irqenable;
 
 	u64 released;
 	u32 released_slow;
@@ -197,7 +196,7 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock
 	__this_cpu_write(lock_spinners, prev);
 }
 
-static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
+static noinline int xen_spin_lock_slow(struct arch_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 	struct xen_spinlock *prev;
@@ -218,8 +217,6 @@ static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enab
 	ADD_STATS(taken_slow_nested, prev != NULL);
 
 	do {
-		unsigned long flags;
-
 		/* clear pending */
 		xen_clear_irq_pending(irq);
 
@@ -239,12 +236,6 @@ static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enab
 			goto out;
 		}
 
-		flags = arch_local_save_flags();
-		if (irq_enable) {
-			ADD_STATS(taken_slow_irqenable, 1);
-			raw_local_irq_enable();
-		}
-
 		/*
 		 * Block until irq becomes pending.  If we're
 		 * interrupted at this point (after the trylock but
@@ -256,8 +247,6 @@ static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enab
 		 */
 		xen_poll_irq(irq);
 
-		raw_local_irq_restore(flags);
-
 		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 
@@ -270,7 +259,7 @@ out:
 	return ret;
 }
 
-static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
+static inline void __xen_spin_lock(struct arch_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 	unsigned timeout;
@@ -302,19 +291,19 @@ static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
 		spin_time_accum_spinning(start_spin_fast);
 
 	} while (unlikely(oldval != 0 &&
-			  (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
+			  (TIMEOUT == ~0 || !xen_spin_lock_slow(lock))));
 
 	spin_time_accum_total(start_spin);
 }
 
 static void xen_spin_lock(struct arch_spinlock *lock)
 {
-	__xen_spin_lock(lock, false);
+	__xen_spin_lock(lock);
 }
 
 static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
 {
-	__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
+	__xen_spin_lock(lock);
 }
 
 static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
@@ -424,8 +413,6 @@ static int __init xen_spinlock_debugfs(void)
 			   &spinlock_stats.taken_slow_pickup);
 	debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
 			   &spinlock_stats.taken_slow_spurious);
-	debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_irqenable);
 
 	debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
 	debugfs_create_u32("released_slow", 0444, d_spin_debug,
-- 
1.7.9.5