4 files changed, 297 insertions, 0 deletions
diff --git a/patches/gmp/5.1.1/100-fix-bulldozer-piledriver.patch b/patches/gmp/5.1.1/100-fix-bulldozer-piledriver.patch
new file mode 100644
index 00000000..f6faca1a
--- /dev/null
+++ b/patches/gmp/5.1.1/100-fix-bulldozer-piledriver.patch
@@ -0,0 +1,21 @@
+
+# HG changeset patch
+# User Torbjorn Granlund <tege@gmplib.org>
+# Date 1368052461 -7200
+# Node ID 11fbd825bc45385d00b69c30bd7566acee11f0d2
+# Parent  bd4c950486ab4c161ea69b91c25150719c3cb700
+Fix typo.
+
+diff -r bd4c950486ab -r 11fbd825bc45 mpn/x86_64/bd1/mul_1.asm
+--- a/mpn/x86_64/bd1/mul_1.asm	Thu May 02 18:20:37 2013 +0200
++++ b/mpn/x86_64/bd1/mul_1.asm	Thu May 09 00:34:21 2013 +0200
+@@ -53,7 +53,7 @@
+ IFDOS(`	define(`v0', ``%r9'')	') dnl
+ IFDOS(`	define(`r9', ``rdi'')	') dnl
+ IFDOS(`	define(`n',  ``%r8'')	') dnl
+-IFDOS(`	define(`r8', ``r11'')	') dnl
++IFDOS(`	define(`r8', ``rbx'')	') dnl
+ 
+ ASM_START()
+ 	TEXT
+
diff --git a/patches/gmp/5.1.1/110-mpz_powm_ui.patch b/patches/gmp/5.1.1/110-mpz_powm_ui.patch
new file mode 100644
index 00000000..f0d240c6
--- /dev/null
+++ b/patches/gmp/5.1.1/110-mpz_powm_ui.patch
@@ -0,0 +1,39 @@
+
+# HG changeset patch
+# User Torbjorn Granlund <tege@gmplib.org>
+# Date 1363174284 -3600
+# Node ID ec4d88674036804b26f22c6a2bfca6ae1e92d370
+# Parent  e616ff715c34e112d0a5f7535d31ffe1194a5c7d
+(mod): Adhere to mpn_mu_div_qr's overlap requirements.
+
+diff -r e616ff715c34 -r ec4d88674036 mpz/powm_ui.c
+--- a/mpz/powm_ui.c	Sun Feb 17 19:40:16 2013 +0100
++++ b/mpz/powm_ui.c	Wed Mar 13 12:31:24 2013 +0100
+@@ -2,8 +2,8 @@
+ 
+    Contributed to the GNU project by Torbjorn Granlund.
+ 
+-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008,
+-2009, 2011, 2012 Free Software Foundation, Inc.
++Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009,
++2011, 2012, 2013 Free Software Foundation, Inc.
+ 
+ This file is part of the GNU MP Library.
+ 
+@@ -70,9 +70,14 @@
+     }
+   else
+     {
++      /* We need to allocate separate remainder area, since mpn_mu_div_qr does
++	 not handle overlap between the numerator and remainder areas.
++	 FIXME: Make it handle such overlap.  */
++      mp_ptr rp = TMP_ALLOC_LIMBS (dn);
+       mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
+       mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+-      mpn_mu_div_qr (qp, np, np, nn, dp, dn, scratch);
++      mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
++      MPN_COPY (np, rp, dn);
+     }
+ 
+   TMP_FREE;
+
diff --git a/patches/gmp/5.1.1/120-fix-mpn_sbpi1_div_qr_sec.patch b/patches/gmp/5.1.1/120-fix-mpn_sbpi1_div_qr_sec.patch
new file mode 100644
index 00000000..13a50ad4
--- /dev/null
+++ b/patches/gmp/5.1.1/120-fix-mpn_sbpi1_div_qr_sec.patch
@@ -0,0 +1,164 @@
+
+# HG changeset patch
+# User Torbjorn Granlund <tege@gmplib.org>
+# Date 1373624469 -7200
+# Node ID a447c0c537891ed23edf180594a89616364ee633
+# Parent  6540e0b2925ead29f7158bb182e4fabfb9441433
+Partial rewrite.
+
+diff -r 6540e0b2925e -r a447c0c53789 mpn/generic/sbpi1_div_sec.c
+--- a/mpn/generic/sbpi1_div_sec.c	Mon Jul 01 19:16:32 2013 +0200
++++ b/mpn/generic/sbpi1_div_sec.c	Fri Jul 12 12:21:09 2013 +0200
+@@ -8,7 +8,7 @@
+    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+ 
+-Copyright 2011, 2012 Free Software Foundation, Inc.
++Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+ 
+ This file is part of the GNU MP Library.
+ 
+@@ -29,6 +29,28 @@
+ #include "gmp-impl.h"
+ #include "longlong.h"
+ 
++/* This side-channel silent division algorithm reduces the partial remainder by
++   GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
++   division algorithm.  We do not insists on reducing by exactly
++   GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
++   too large (B is the limb base, D is the divisor, and i is the induction
++   variable); the subsequent step will handle the extra partial remainder bits.
++
++   WIth that partial remainder reduction, each step generates a quotient "half
++   limb".  The outer loop generates two quotient half limbs, an upper (q1h) and
++   a lower (q0h) which are stored sparsely in separate limb arrays.  These
++   arrays are added at the end; using separate arrays avoids data-dependent
++   carry propagation which could else pose a side-channel leakage problem.
++
++   The quotient half limbs may be between -3 to 0 from the accurate value
++   ("accurate" being the one which corresponds to a reduction to a principal
++   partial remainder).  Too small quotient half limbs correspond to too large
++   remainders, which we reduce later, as described above.
++
++   In order to keep quotients from getting too big, corresponding to a negative
++   partial remainder, we use an inverse which is sligtly smaller than usually.
++*/
++
+ #if OPERATION_sbpi1_div_qr_sec
+ /* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
+ #define FNAME mpn_sbpi1_div_qr_sec
+@@ -49,7 +71,7 @@
+        mp_limb_t dinv,
+        mp_ptr tp)
+ {
+-  mp_limb_t nh, cy, q1h, q0h, dummy, h;
++  mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
+   mp_size_t i;
+   mp_ptr hp;
+ #if OPERATION_sbpi1_div_qr_sec
+@@ -72,77 +94,69 @@
+ #endif
+     }
+ 
++  /* Decremenet inverse to keep quotient half limbs from being too large.  */
++  dinv -= dinv != 0;				/* FIXME: cmp-to-int */
++
+   /* Create a divisor copy shifted half a limb.  */
+   hp = tp;					/* (dn + 1) limbs */
+-  cy = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
+-  hp[dn] = dp[dn - 1] >> GMP_NUMB_BITS / 2;
++  hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
+ 
+ #if OPERATION_sbpi1_div_qr_sec
+   qlp = tp + (dn + 1);				/* (nn - dn) limbs */
+   qhp = tp + (nn + 1);				/* (nn - dn) limbs */
+ #endif
+ 
+-  np += nn;
++  np += nn - dn;
++  nh = 0;
+ 
+-  /* Main loop.  Develop one full limb per iteration, but do it in two steps in
+-     order to avoid conditionals.  Quotient bits will be either correct or
+-     underestimates.  When a quotient is underestimated, the next quotient will
+-     compensate, since quotients are to be added at consecutive weight distance
+-     GMP_NUMB_BITS/2.  We make two quotient arrays, each with GMP_NUMB_BITS/2+2
+-     bits per entry.  The arrays are added late after the loop.  Separate
+-     arrays avoid data-dependent carry propagation.  */
+-  nh = 0;
+   for (i = nn - dn - 1; i >= 0; i--)
+     {
+       np--;
+ 
+-      nh = (nh << GMP_NUMB_BITS/2) + (np[0] >> GMP_NUMB_BITS/2);
++      nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
+       umul_ppmm (q1h, dummy, nh, dinv);
+       q1h += nh;
+ #if OPERATION_sbpi1_div_qr_sec
+       qhp[i] = q1h;
+ #endif
+-      cy = mpn_submul_1 (np - dn, hp, dn + 1, q1h);
++      mpn_submul_1 (np, hp, dn + 1, q1h);
+ 
+-      nh = np[0];
++      nh = np[dn];
+       umul_ppmm (q0h, dummy, nh, dinv);
+       q0h += nh;
+ #if OPERATION_sbpi1_div_qr_sec
+       qlp[i] = q0h;
+ #endif
+-      cy = mpn_submul_1 (np - dn, dp, dn, q0h);
+-
+-      nh -= cy;
++      nh -= mpn_submul_1 (np, dp, dn, q0h);
+     }
+ 
+-  np[0] = nh;
+-
+-  np -= dn;
+-
+   /* 1st adjustment depends on extra high remainder limb.  */
+-  h = np[dn];
++  cnd = nh != 0;				/* FIXME: cmp-to-int */
+ #if OPERATION_sbpi1_div_qr_sec
+-  qlp[0] += h;
++  qlp[0] += cnd;
+ #endif
+-  h -= mpn_subcnd_n (np, np, dp, dn, h);
++  nh -= mpn_subcnd_n (np, np, dp, dn, cnd);
+ 
+-  /* 2nd adjustment depends on remainder/divisor comparision as well as whether
++  /* 2nd adjustment depends on remainder/divisor comparison as well as whether
+      extra remainder limb was nullified by previous subtract.  */
+   cy = mpn_sub_n (np, np, dp, dn);
+-  cy = cy == h;				/* FIXME: might leak on some archs */
++  cy = cy - nh;
+ #if OPERATION_sbpi1_div_qr_sec
+-  qlp[0] += cy;
++  qlp[0] += 1 - cy;
+ #endif
+-  mpn_addcnd_n (np, np, dp, dn, 1 - cy);
++  mpn_addcnd_n (np, np, dp, dn, cy);
+ 
++  /* 3rd adjustment depends on remainder/divisor comparison.  */
++  cy = mpn_sub_n (np, np, dp, dn);
++#if OPERATION_sbpi1_div_qr_sec
++  qlp[0] += 1 - cy;
++#endif
++  mpn_addcnd_n (np, np, dp, dn, cy);
++
++#if OPERATION_sbpi1_div_qr_sec
+   /* Combine quotient halves into final quotient.  */
+-#if OPERATION_sbpi1_div_qr_sec
+-  qh = 0;
+-  if (nn - dn != 0)
+-    {
+-      qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
+-      qh += mpn_add_n (qp, qhp, qlp, nn - dn);
+-    }
++  qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
++  qh += mpn_add_n (qp, qhp, qlp, nn - dn);
+ 
+   return qh;
+ #else
+
diff --git a/patches/gmp/5.1.1/130-do-not-clobber-f16-f18.patch b/patches/gmp/5.1.1/130-do-not-clobber-f16-f18.patch
new file mode 100644
index 00000000..9f9f470c
--- /dev/null
+++ b/patches/gmp/5.1.1/130-do-not-clobber-f16-f18.patch
@@ -0,0 +1,73 @@
+
+# HG changeset patch
+# User Torbjorn Granlund <tege@gmplib.org>
+# Date 1369216954 -7200
+# Node ID 394bdf8fdaee749660cc5680cc0636ea0242fae7
+# Parent  0771124b9f13b218a3f07f7dd7c127961974489d
+Don't clobber f16-f18.
+
+diff -r 0771124b9f13 -r 394bdf8fdaee mpn/ia64/divrem_2.asm
+--- a/mpn/ia64/divrem_2.asm	Mon May 20 16:56:05 2013 +0200
++++ b/mpn/ia64/divrem_2.asm	Wed May 22 12:02:34 2013 +0200
+@@ -1,6 +1,6 @@
+ dnl  IA-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+ 
+-dnl  Copyright 2010 Free Software Foundation, Inc.
++dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
+ 
+ dnl  This file is part of the GNU MP Library.
+ 
+@@ -98,17 +98,17 @@
+ 	br.call.sptk.many b0 = mpn_invert_limb
+ 	;;
+ 	setf.sig f11 = r8		// di (non-final)
+-	setf.sig f18 = r39		// d1
+-	setf.sig f17 = r36		// d0
++	setf.sig f34 = r39		// d1
++	setf.sig f33 = r36		// d0
+ 	mov	 r1 = r43
+ 	;;
+ 	mov	 r17 = 1
+ 	setf.sig f9 = r38		// n2
+-	xma.l	 f6 = f11, f18, f0	// t0 = LO(di * d1)
++	xma.l	 f6 = f11, f34, f0	// t0 = LO(di * d1)
+ 	;;
+ 	setf.sig f10 = r37		// n1
+ 	setf.sig f15 = r17		// 1
+-	xma.hu	 f8 = f11, f17, f0	// s0 = HI(di * d0)
++	xma.hu	 f8 = f11, f33, f0	// s0 = HI(di * d0)
+ 	;;
+ 	getf.sig r17 = f6
+ 	getf.sig r16 = f8
+@@ -178,7 +178,7 @@
+   (p9)	br.cond.dptk .L52
+ .L46:
+ ')
+-	setf.sig f16 = r8		// di
++	setf.sig f32 = r8		// di
+ 	shladd	 r32 = r35, 3, r32
+ 	;;
+ 
+@@ -189,8 +189,8 @@
+ 	;;
+  (p8)	mov	 r37 = r0
+  (p9)	ld8	 r37 = [r34], -8
+-	xma.hu	 f8 = f9, f16, f10	//				0,29
+-	xma.l	 f12 = f9, f16, f10	//				0
++	xma.hu	 f8 = f9, f32, f10	//				0,29
++	xma.l	 f12 = f9, f32, f10	//				0
+ 	;;
+ 	getf.sig r20 = f12		// q0				4
+ 	xma.l	 f13 = f15, f8, f9	// q += n2			4
+@@ -198,8 +198,8 @@
+ 	;;
+ 	getf.sig r18 = f13		//				8
+ 	xma.l	 f7 = f14, f13, f10	//				8
+-	xma.l	 f6 = f17, f13, f17	// t0 = LO(d0*q+d0)		8
+-	xma.hu	 f9 = f17, f13, f17	// t1 = HI(d0*q+d0)		9
++	xma.l	 f6 = f33, f13, f33	// t0 = LO(d0*q+d0)		8
++	xma.hu	 f9 = f33, f13, f33	// t1 = HI(d0*q+d0)		9
+ 	;;
+ 	getf.sig r38 = f7		// n1				12
+ 	getf.sig r16 = f6		//				13
+