4f33937fbdf806777ed3619ff7a355fb839c197e
[libgcrypt.git] / mpi / longlong.h
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2    Note: I added some stuff for use with gnupg
3
4 Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
5               2000, 2001, 2002, 2003, 2004, 2011 Free Software Foundation, Inc.
6
7 This file is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or (at your
10 option) any later version.
11
12 This file is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Library General Public License
18 along with this file; see the file COPYING.LIB.  If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 MA 02111-1307, USA. */
21
22 /* You have to define the following before including this file:
23
24    UWtype -- An unsigned type, default type for operations (typically a "word")
25    UHWtype -- An unsigned type, at least half the size of UWtype.
26    UDWtype -- An unsigned type, at least twice as large a UWtype
27    W_TYPE_SIZE -- size in bits of UWtype
28
29    SItype, USItype -- Signed and unsigned 32 bit types.
30    DItype, UDItype -- Signed and unsigned 64 bit types.
31
32    On a 32 bit machine UWtype should typically be USItype;
33    on a 64 bit machine, UWtype should typically be UDItype.
34 */
35
36 #define __BITS4 (W_TYPE_SIZE / 4)
37 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
38 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
39 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
40
41 /* This is used to make sure no undesirable sharing between different libraries
42    that use this file takes place.  */
43 #ifndef __MPN
44 #define __MPN(x) __##x
45 #endif
46
47 /* Define auxiliary asm macros.
48
49    1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
50    UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
51    word product in HIGH_PROD and LOW_PROD.
52
53    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
54    UDWtype product.  This is just a variant of umul_ppmm.
55
56    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
57    denominator) divides a UDWtype, composed by the UWtype integers
58    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
59    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
60    than DENOMINATOR for correct operation.  If, in addition, the most
61    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
62    UDIV_NEEDS_NORMALIZATION is defined to 1.
63
64    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
65    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
66    is rounded towards 0.
67
68    5) count_leading_zeros(count, x) counts the number of zero-bits from the
69    msb to the first non-zero bit in the UWtype X.  This is the number of
70    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
71    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
72
73    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
74    from the least significant end.
75
76    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
77    high_addend_2, low_addend_2) adds two UWtype integers, composed by
78    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
79    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
80    (i.e. carry out) is not stored anywhere, and is lost.
81
82    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
83    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
84    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
85    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
86    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
87    and is lost.
88
89    If any of these macros are left undefined for a particular CPU,
90    C macros are used.  */
91
92 /* The CPUs come in alphabetical order below.
93
94    Please add support for more CPUs here, or improve the current support
95    for the CPUs below!  */
96
97 #ifdef __riscos__
98 #pragma continue_after_hash_error
99 #else /* !__riscos__ */
100 #if defined (__GNUC__) && !defined (NO_ASM)
101
102 /* We sometimes need to clobber "cc" with gcc2, but that would not be
103    understood by gcc1.  Use cpp to avoid major code duplication.  */
104 #if __GNUC__ < 2
105 #define __CLOBBER_CC
106 #define __AND_CLOBBER_CC
107 #else /* __GNUC__ >= 2 */
108 #define __CLOBBER_CC : "cc"
109 #define __AND_CLOBBER_CC , "cc"
110 #endif /* __GNUC__ < 2 */
111
112
113 /***************************************
114  **************  A29K  *****************
115  ***************************************/
116 #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
117 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
118   __asm__ ("add %1,%4,%5\n"   \
119            "addc %0,%2,%3"                                              \
120            : "=r" ((USItype)(sh)),                                      \
121             "=&r" ((USItype)(sl))                                       \
122            : "%r" ((USItype)(ah)),                                      \
123              "rI" ((USItype)(bh)),                                      \
124              "%r" ((USItype)(al)),                                      \
125              "rI" ((USItype)(bl)))
126 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
127   __asm__ ("sub %1,%4,%5\n"                                             \
128            "subc %0,%2,%3"                                              \
129            : "=r" ((USItype)(sh)),                                      \
130              "=&r" ((USItype)(sl))                                      \
131            : "r" ((USItype)(ah)),                                       \
132              "rI" ((USItype)(bh)),                                      \
133              "r" ((USItype)(al)),                                       \
134              "rI" ((USItype)(bl)))
135 #define umul_ppmm(xh, xl, m0, m1) \
136   do {                                                                  \
137     USItype __m0 = (m0), __m1 = (m1);                                   \
138     __asm__ ("multiplu %0,%1,%2"                                        \
139              : "=r" ((USItype)(xl))                                     \
140              : "r" (__m0),                                              \
141                "r" (__m1));                                             \
142     __asm__ ("multmu %0,%1,%2"                                          \
143              : "=r" ((USItype)(xh))                                     \
144              : "r" (__m0),                                              \
145                "r" (__m1));                                             \
146   } while (0)
147 #define udiv_qrnnd(q, r, n1, n0, d) \
148   __asm__ ("dividu %0,%3,%4"                                            \
149            : "=r" ((USItype)(q)),                                       \
150              "=q" ((USItype)(r))                                        \
151            : "1" ((USItype)(n1)),                                       \
152              "r" ((USItype)(n0)),                                       \
153              "r" ((USItype)(d)))
154 #define count_leading_zeros(count, x) \
155     __asm__ ("clz %0,%1"                                                \
156              : "=r" ((USItype)(count))                                  \
157              : "r" ((USItype)(x)))
158 #define COUNT_LEADING_ZEROS_0 32
159 #endif /* __a29k__ */
160
161
162 #if defined (__alpha) && W_TYPE_SIZE == 64
163 #define umul_ppmm(ph, pl, m0, m1) \
164   do {                                                                  \
165     UDItype __m0 = (m0), __m1 = (m1);                                   \
166     __asm__ ("umulh %r1,%2,%0"                                          \
167              : "=r" ((UDItype) ph)                                      \
168              : "%rJ" (__m0),                                            \
169                "rI" (__m1));                                            \
170     (pl) = __m0 * __m1;                                                 \
171   } while (0)
172 #define UMUL_TIME 46
173 #ifndef LONGLONG_STANDALONE
174 #define udiv_qrnnd(q, r, n1, n0, d) \
175   do { UDItype __r;                                                     \
176     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
177     (r) = __r;                                                          \
178   } while (0)
179 extern UDItype __udiv_qrnnd ();
180 #define UDIV_TIME 220
181 #endif /* LONGLONG_STANDALONE */
182 #endif /* __alpha */
183
184 /***************************************
185  **************  ARM  ******************
186  ***************************************/
187 #if defined (__arm__) && W_TYPE_SIZE == 32 && \
188     (!defined (__thumb__) || defined (__thumb2__))
189 /* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
190 #ifndef __ARM_ARCH
191 # ifdef __ARM_ARCH_2__
192 #  define __ARM_ARCH 2
193 # elif defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
194 #  define __ARM_ARCH 3
195 # elif defined (__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__)
196 #  define __ARM_ARCH 4
197 # elif defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5E__) \
198        || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
199        || defined(__ARM_ARCH_5TEJ__)
200 #  define __ARM_ARCH 5
201 # elif defined (__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
202        || defined (__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
203        || defined (__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
204 #  define __ARM_ARCH 6
205 # elif defined (__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
206        || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
207        || defined(__ARM_ARCH_7EM__)
208 #  define __ARM_ARCH 7
209 # else
210    /* could not detect? */
211 # endif
212 #endif
213 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
214   __asm__ ("adds %1, %4, %5\n"                                          \
215            "adc  %0, %2, %3"                                            \
216            : "=r" ((sh)),                                               \
217              "=&r" ((sl))                                               \
218            : "%r" ((USItype)(ah)),                                      \
219              "rI" ((USItype)(bh)),                                      \
220              "%r" ((USItype)(al)),                                      \
221              "rI" ((USItype)(bl)) __CLOBBER_CC)
222 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
223   __asm__ ("subs %1, %4, %5\n"                                          \
224            "sbc  %0, %2, %3"                                            \
225            : "=r" ((sh)),                                               \
226              "=&r" ((sl))                                               \
227            : "r" ((USItype)(ah)),                                       \
228              "rI" ((USItype)(bh)),                                      \
229              "r" ((USItype)(al)),                                       \
230              "rI" ((USItype)(bl)) __CLOBBER_CC)
231 #if (defined __ARM_ARCH && __ARM_ARCH <= 3)
232 #define umul_ppmm(xh, xl, a, b) \
233   __asm__ ("@ Inlined umul_ppmm\n"                                      \
234         "mov    %|r0, %2, lsr #16               @ AAAA\n"               \
235         "mov    %|r2, %3, lsr #16               @ BBBB\n"               \
236         "bic    %|r1, %2, %|r0, lsl #16         @ aaaa\n"               \
237         "bic    %0, %3, %|r2, lsl #16           @ bbbb\n"               \
238         "mul    %1, %|r1, %|r2                  @ aaaa * BBBB\n"        \
239         "mul    %|r2, %|r0, %|r2                @ AAAA * BBBB\n"        \
240         "mul    %|r1, %0, %|r1                  @ aaaa * bbbb\n"        \
241         "mul    %0, %|r0, %0                    @ AAAA * bbbb\n"        \
242         "adds   %|r0, %1, %0                    @ central sum\n"        \
243         "addcs  %|r2, %|r2, #65536\n"                                   \
244         "adds   %1, %|r1, %|r0, lsl #16\n"                              \
245         "adc    %0, %|r2, %|r0, lsr #16"                                \
246            : "=&r" ((xh)),                                              \
247              "=r" ((xl))                                                \
248            : "r" ((USItype)(a)),                                        \
249              "r" ((USItype)(b))                                         \
250            : "r0", "r1", "r2" __AND_CLOBBER_CC)
251 #else /* __ARM_ARCH >= 4 */
252 #define umul_ppmm(xh, xl, a, b)                                         \
253   __asm__ ("@ Inlined umul_ppmm\n"                                      \
254            "umull %1, %0, %2, %3"                                       \
255                    : "=&r" ((xh)),                                      \
256                      "=r" ((xl))                                        \
257                    : "r" ((USItype)(a)),                                \
258                      "r" ((USItype)(b)))
259 #endif /* __ARM_ARCH >= 4 */
260 #define UMUL_TIME 20
261 #define UDIV_TIME 100
262 #if (defined __ARM_ARCH && __ARM_ARCH >= 5)
263 #define count_leading_zeros(count, x) \
264   __asm__ ("clz %0, %1"                                                 \
265                    : "=r" ((count))                                     \
266                    : "r" ((USItype)(x)))
267 #endif /* __ARM_ARCH >= 5 */
268 #endif /* __arm__ */
269
270 /***************************************
271  **********  ARM64 / Aarch64  **********
272  ***************************************/
273 #if defined(__aarch64__) && W_TYPE_SIZE == 64
274 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
275   __asm__ ("adds %1, %4, %5\n"                                          \
276            "adc  %0, %2, %3\n"                                          \
277            : "=r" ((sh)),                                               \
278              "=&r" ((sl))                                               \
279            : "r" ((UDItype)(ah)),                                       \
280              "r" ((UDItype)(bh)),                                       \
281              "r" ((UDItype)(al)),                                       \
282              "r" ((UDItype)(bl)) __CLOBBER_CC)
283 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
284   __asm__ ("subs %1, %4, %5\n"                                          \
285            "sbc  %0, %2, %3\n"                                          \
286            : "=r" ((sh)),                                               \
287              "=&r" ((sl))                                               \
288            : "r" ((UDItype)(ah)),                                       \
289              "r" ((UDItype)(bh)),                                       \
290              "r" ((UDItype)(al)),                                       \
291              "r" ((UDItype)(bl)) __CLOBBER_CC)
292 #define umul_ppmm(ph, pl, m0, m1) \
293   do {                                                                  \
294     UDItype __m0 = (m0), __m1 = (m1), __ph;                             \
295     (pl) = __m0 * __m1;                                                 \
296     __asm__ ("umulh %0,%1,%2"                                           \
297              : "=r" (__ph)                                              \
298              : "r" (__m0),                                              \
299                "r" (__m1));                                             \
300     (ph) = __ph; \
301   } while (0)
302 #define count_leading_zeros(count, x) \
303   __asm__ ("clz %0, %1\n"                                               \
304            : "=r" ((count))                                             \
305            : "r" ((UDItype)(x)))
306 #endif /* __aarch64__ */
307
308 /***************************************
309  **************  CLIPPER  **************
310  ***************************************/
311 #if defined (__clipper__) && W_TYPE_SIZE == 32
312 #define umul_ppmm(w1, w0, u, v) \
313   ({union {UDItype __ll;                                                \
314            struct {USItype __l, __h;} __i;                              \
315           } __xx;                                                       \
316   __asm__ ("mulwux %2,%0"                                               \
317            : "=r" (__xx.__ll)                                           \
318            : "%0" ((USItype)(u)),                                       \
319              "r" ((USItype)(v)));                                       \
320   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
321 #define smul_ppmm(w1, w0, u, v) \
322   ({union {DItype __ll;                                                 \
323            struct {SItype __l, __h;} __i;                               \
324           } __xx;                                                       \
325   __asm__ ("mulwx %2,%0"                                                \
326            : "=r" (__xx.__ll)                                           \
327            : "%0" ((SItype)(u)),                                        \
328              "r" ((SItype)(v)));                                        \
329   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
330 #define __umulsidi3(u, v) \
331   ({UDItype __w;                                                        \
332     __asm__ ("mulwux %2,%0"                                             \
333              : "=r" (__w)                                               \
334              : "%0" ((USItype)(u)),                                     \
335                "r" ((USItype)(v)));                                     \
336     __w; })
337 #endif /* __clipper__ */
338
339
340 /***************************************
341  **************  GMICRO  ***************
342  ***************************************/
343 #if defined (__gmicro__) && W_TYPE_SIZE == 32
344 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
345   __asm__ ("add.w %5,%1\n"                                              \
346            "addx %3,%0"                                                 \
347            : "=g" ((USItype)(sh)),                                      \
348              "=&g" ((USItype)(sl))                                      \
349            : "%0" ((USItype)(ah)),                                      \
350              "g" ((USItype)(bh)),                                       \
351              "%1" ((USItype)(al)),                                      \
352              "g" ((USItype)(bl)))
353 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
354   __asm__ ("sub.w %5,%1\n"                                              \
355            "subx %3,%0"                                                 \
356            : "=g" ((USItype)(sh)),                                      \
357              "=&g" ((USItype)(sl))                                      \
358            : "0" ((USItype)(ah)),                                       \
359              "g" ((USItype)(bh)),                                       \
360              "1" ((USItype)(al)),                                       \
361              "g" ((USItype)(bl)))
362 #define umul_ppmm(ph, pl, m0, m1) \
363   __asm__ ("mulx %3,%0,%1"                                              \
364            : "=g" ((USItype)(ph)),                                      \
365              "=r" ((USItype)(pl))                                       \
366            : "%0" ((USItype)(m0)),                                      \
367              "g" ((USItype)(m1)))
368 #define udiv_qrnnd(q, r, nh, nl, d) \
369   __asm__ ("divx %4,%0,%1"                                              \
370            : "=g" ((USItype)(q)),                                       \
371              "=r" ((USItype)(r))                                        \
372            : "1" ((USItype)(nh)),                                       \
373              "0" ((USItype)(nl)),                                       \
374              "g" ((USItype)(d)))
375 #define count_leading_zeros(count, x) \
376   __asm__ ("bsch/1 %1,%0"                                               \
377            : "=g" (count)                                               \
378            : "g" ((USItype)(x)),                                        \
379              "0" ((USItype)0))
380 #endif
381
382
383 /***************************************
384  **************  HPPA  *****************
385  ***************************************/
386 #if defined (__hppa) && W_TYPE_SIZE == 32
387 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
388   __asm__ ("    add %4,%5,%1\n"                                             \
389            "    addc %2,%3,%0"                                              \
390            : "=r" ((USItype)(sh)),                                      \
391              "=&r" ((USItype)(sl))                                      \
392            : "%rM" ((USItype)(ah)),                                     \
393              "rM" ((USItype)(bh)),                                      \
394              "%rM" ((USItype)(al)),                                     \
395              "rM" ((USItype)(bl)))
396 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
397   __asm__ ("    sub %4,%5,%1\n"                                             \
398            "    subb %2,%3,%0"                                              \
399            : "=r" ((USItype)(sh)),                                      \
400              "=&r" ((USItype)(sl))                                      \
401            : "rM" ((USItype)(ah)),                                      \
402              "rM" ((USItype)(bh)),                                      \
403              "rM" ((USItype)(al)),                                      \
404              "rM" ((USItype)(bl)))
405 #if defined (_PA_RISC1_1)
406 #define umul_ppmm(wh, wl, u, v) \
407   do {                                                                  \
408     union {UDItype __ll;                                                \
409            struct {USItype __h, __l;} __i;                              \
410           } __xx;                                                       \
411     __asm__ ("  xmpyu %1,%2,%0"                                           \
412              : "=*f" (__xx.__ll)                                        \
413              : "*f" ((USItype)(u)),                                     \
414                "*f" ((USItype)(v)));                                    \
415     (wh) = __xx.__i.__h;                                                \
416     (wl) = __xx.__i.__l;                                                \
417   } while (0)
418 #define UMUL_TIME 8
419 #define UDIV_TIME 60
420 #else
421 #define UMUL_TIME 40
422 #define UDIV_TIME 80
423 #endif
424 #ifndef LONGLONG_STANDALONE
425 #define udiv_qrnnd(q, r, n1, n0, d) \
426   do { USItype __r;                                                     \
427     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
428     (r) = __r;                                                          \
429   } while (0)
430 extern USItype __udiv_qrnnd ();
431 #endif /* LONGLONG_STANDALONE */
432 #define count_leading_zeros(count, x) \
433   do {                                                                 \
434     USItype __tmp;                                                     \
435     __asm__ (                                                          \
436        "        ldi             1,%0                                       \n" \
437        "        extru,=         %1,15,16,%%r0  ; Bits 31..16 zero?         \n" \
438        "        extru,tr        %1,15,16,%1    ; No.  Shift down, skip add.\n" \
439        "        ldo             16(%0),%0      ; Yes.   Perform add.       \n" \
440        "        extru,=         %1,23,8,%%r0   ; Bits 15..8 zero?          \n" \
441        "        extru,tr        %1,23,8,%1     ; No.  Shift down, skip add.\n" \
442        "        ldo             8(%0),%0       ; Yes.   Perform add.       \n" \
443        "        extru,=         %1,27,4,%%r0   ; Bits 7..4 zero?           \n" \
444        "        extru,tr        %1,27,4,%1     ; No.  Shift down, skip add.\n" \
445        "        ldo             4(%0),%0       ; Yes.   Perform add.       \n" \
446        "        extru,=         %1,29,2,%%r0   ; Bits 3..2 zero?           \n" \
447        "        extru,tr        %1,29,2,%1     ; No.  Shift down, skip add.\n" \
448        "        ldo             2(%0),%0       ; Yes.   Perform add.       \n" \
449        "        extru           %1,30,1,%1     ; Extract bit 1.            \n" \
450        "        sub             %0,%1,%0       ; Subtract it.              "   \
451        : "=r" (count), "=r" (__tmp) : "1" (x));                        \
452   } while (0)
453 #endif /* hppa */
454
455
456 /***************************************
457  **************  I370  *****************
458  ***************************************/
459 #if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32
460 #define umul_ppmm(xh, xl, m0, m1) \
461   do {                                                                  \
462     union {UDItype __ll;                                                \
463            struct {USItype __h, __l;} __i;                              \
464           } __xx;                                                       \
465     USItype __m0 = (m0), __m1 = (m1);                                   \
466     __asm__ ("mr %0,%3"                                                 \
467              : "=r" (__xx.__i.__h),                                     \
468                "=r" (__xx.__i.__l)                                      \
469              : "%1" (__m0),                                             \
470                "r" (__m1));                                             \
471     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
472     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
473              + (((SItype) __m1 >> 31) & __m0));                         \
474   } while (0)
475 #define smul_ppmm(xh, xl, m0, m1) \
476   do {                                                                  \
477     union {DItype __ll;                                                 \
478            struct {USItype __h, __l;} __i;                              \
479           } __xx;                                                       \
480     __asm__ ("mr %0,%3"                                                 \
481              : "=r" (__xx.__i.__h),                                     \
482                "=r" (__xx.__i.__l)                                      \
483              : "%1" (m0),                                               \
484                "r" (m1));                                               \
485     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
486   } while (0)
487 #define sdiv_qrnnd(q, r, n1, n0, d) \
488   do {                                                                  \
489     union {DItype __ll;                                                 \
490            struct {USItype __h, __l;} __i;                              \
491           } __xx;                                                       \
492     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
493     __asm__ ("dr %0,%2"                                                 \
494              : "=r" (__xx.__ll)                                         \
495              : "0" (__xx.__ll), "r" (d));                               \
496     (q) = __xx.__i.__l; (r) = __xx.__i.__h;                             \
497   } while (0)
498 #endif
499
500
501 /***************************************
502  **************  I386  *****************
503  ***************************************/
504 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
505 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
506   __asm__ ("addl %5,%1\n"                                               \
507            "adcl %3,%0"                                                 \
508            : "=r" ((sh)),                                               \
509              "=&r" ((sl))                                               \
510            : "%0" ((USItype)(ah)),                                      \
511              "g" ((USItype)(bh)),                                       \
512              "%1" ((USItype)(al)),                                      \
513              "g" ((USItype)(bl))                                        \
514            __CLOBBER_CC)
515 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
516   __asm__ ("subl %5,%1\n"                                               \
517            "sbbl %3,%0"                                                 \
518            : "=r" ((sh)),                                               \
519              "=&r" ((sl))                                               \
520            : "0" ((USItype)(ah)),                                       \
521              "g" ((USItype)(bh)),                                       \
522              "1" ((USItype)(al)),                                       \
523              "g" ((USItype)(bl))                                        \
524            __CLOBBER_CC)
525 #define umul_ppmm(w1, w0, u, v) \
526   __asm__ ("mull %3"                                                    \
527            : "=a" ((w0)),                                               \
528              "=d" ((w1))                                                \
529            : "%0" ((USItype)(u)),                                       \
530              "rm" ((USItype)(v))                                        \
531            __CLOBBER_CC)
532 #define udiv_qrnnd(q, r, n1, n0, d) \
533   __asm__ ("divl %4"                                                    \
534            : "=a" ((q)),                                                \
535              "=d" ((r))                                                 \
536            : "0" ((USItype)(n0)),                                       \
537              "1" ((USItype)(n1)),                                       \
538              "rm" ((USItype)(d))                                        \
539            __CLOBBER_CC)
540 #define count_leading_zeros(count, x) \
541   do {                                                                  \
542     USItype __cbtmp;                                                    \
543     __asm__ ("bsrl %1,%0"                                               \
544              : "=r" (__cbtmp) : "rm" ((USItype)(x))                     \
545              __CLOBBER_CC);                                             \
546     (count) = __cbtmp ^ 31;                                             \
547   } while (0)
548 #define count_trailing_zeros(count, x) \
549   __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)) __CLOBBER_CC)
550 #ifndef UMUL_TIME
551 #define UMUL_TIME 40
552 #endif
553 #ifndef UDIV_TIME
554 #define UDIV_TIME 40
555 #endif
556 #endif /* 80x86 */
557
558 /***************************************
559  *********** AMD64 / x86-64 ************
560  ***************************************/
561 #if defined(__x86_64) && W_TYPE_SIZE == 64
562 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
563   __asm__ ("addq %5,%1\n"                                               \
564            "adcq %3,%0"                                                 \
565            : "=r" ((sh)),                                               \
566              "=&r" ((sl))                                               \
567            : "0" ((UDItype)(ah)),                                       \
568              "g"  ((UDItype)(bh)),                                      \
569              "1" ((UDItype)(al)),                                       \
570              "g"  ((UDItype)(bl))                                       \
571            __CLOBBER_CC)
572 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
573   __asm__ ("subq %5,%1\n"                                               \
574            "sbbq %3,%0"                                                 \
575            : "=r" ((sh)),                                               \
576              "=&r" ((sl))                                               \
577            : "0" ((UDItype)(ah)),                                       \
578              "g" ((UDItype)(bh)),                                       \
579              "1" ((UDItype)(al)),                                       \
580              "g" ((UDItype)(bl))                                        \
581            __CLOBBER_CC)
582 #define umul_ppmm(w1, w0, u, v) \
583   __asm__ ("mulq %3"                                                    \
584            : "=a" ((w0)),                                               \
585              "=d" ((w1))                                                \
586            : "0" ((UDItype)(u)),                                        \
587              "rm" ((UDItype)(v))                                        \
588            __CLOBBER_CC)
589 #define udiv_qrnnd(q, r, n1, n0, d) \
590   __asm__ ("divq %4"                                                    \
591            : "=a" ((q)),                                                \
592              "=d" ((r))                                                 \
593            : "0" ((UDItype)(n0)),                                       \
594              "1" ((UDItype)(n1)),                                       \
595              "rm" ((UDItype)(d))                                        \
596            __CLOBBER_CC)
597 #define count_leading_zeros(count, x) \
598   do {                                                                  \
599     UDItype __cbtmp;                                                    \
600     __asm__ ("bsrq %1,%0"                                               \
601              : "=r" (__cbtmp) : "rm" ((UDItype)(x))                     \
602              __CLOBBER_CC);                                             \
603     (count) = __cbtmp ^ 63;                                             \
604   } while (0)
605 #define count_trailing_zeros(count, x) \
606   do {                                                                  \
607     UDItype __cbtmp;                                                    \
608     __asm__ ("bsfq %1,%0"                                               \
609              : "=r" (__cbtmp) : "rm" ((UDItype)(x))                     \
610              __CLOBBER_CC);                                             \
611     (count) = __cbtmp;                                                  \
612   } while (0)
613 #ifndef UMUL_TIME
614 #define UMUL_TIME 40
615 #endif
616 #ifndef UDIV_TIME
617 #define UDIV_TIME 40
618 #endif
619 #endif /* __x86_64 */
620
621
622 /***************************************
623  **************  I860  *****************
624  ***************************************/
625 #if defined (__i860__) && W_TYPE_SIZE == 32
626 #define rshift_rhlc(r,h,l,c) \
627   __asm__ ("shr %3,r0,r0\n"  \
628            "shrd %1,%2,%0"   \
629            "=r" (r) : "r" (h), "r" (l), "rn" (c))
630 #endif /* i860 */
631
632 /***************************************
633  **************  I960  *****************
634  ***************************************/
635 #if defined (__i960__) && W_TYPE_SIZE == 32
636 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
637   __asm__ ("cmpo 1,0\n"      \
638            "addc %5,%4,%1\n" \
639            "addc %3,%2,%0"   \
640            : "=r" ((USItype)(sh)),                                      \
641              "=&r" ((USItype)(sl))                                      \
642            : "%dI" ((USItype)(ah)),                                     \
643              "dI" ((USItype)(bh)),                                      \
644              "%dI" ((USItype)(al)),                                     \
645              "dI" ((USItype)(bl)))
646 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
647   __asm__ ("cmpo 0,0\n"      \
648            "subc %5,%4,%1\n" \
649            "subc %3,%2,%0"   \
650            : "=r" ((USItype)(sh)),                                      \
651              "=&r" ((USItype)(sl))                                      \
652            : "dI" ((USItype)(ah)),                                      \
653              "dI" ((USItype)(bh)),                                      \
654              "dI" ((USItype)(al)),                                      \
655              "dI" ((USItype)(bl)))
656 #define umul_ppmm(w1, w0, u, v) \
657   ({union {UDItype __ll;                                                \
658            struct {USItype __l, __h;} __i;                              \
659           } __xx;                                                       \
660   __asm__ ("emul        %2,%1,%0"                                       \
661            : "=d" (__xx.__ll)                                           \
662            : "%dI" ((USItype)(u)),                                      \
663              "dI" ((USItype)(v)));                                      \
664   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
665 #define __umulsidi3(u, v) \
666   ({UDItype __w;                                                        \
667     __asm__ ("emul      %2,%1,%0"                                       \
668              : "=d" (__w)                                               \
669              : "%dI" ((USItype)(u)),                                    \
670                "dI" ((USItype)(v)));                                    \
671     __w; })
672 #define udiv_qrnnd(q, r, nh, nl, d) \
673   do {                                                                  \
674     union {UDItype __ll;                                                \
675            struct {USItype __l, __h;} __i;                              \
676           } __nn;                                                       \
677     __nn.__i.__h = (nh); __nn.__i.__l = (nl);                           \
678     __asm__ ("ediv %d,%n,%0"                                            \
679            : "=d" (__rq.__ll)                                           \
680            : "dI" (__nn.__ll),                                          \
681              "dI" ((USItype)(d)));                                      \
682     (r) = __rq.__i.__l; (q) = __rq.__i.__h;                             \
683   } while (0)
684 #define count_leading_zeros(count, x) \
685   do {                                                                  \
686     USItype __cbtmp;                                                    \
687     __asm__ ("scanbit %1,%0"                                            \
688              : "=r" (__cbtmp)                                           \
689              : "r" ((USItype)(x)));                                     \
690     (count) = __cbtmp ^ 31;                                             \
691   } while (0)
692 #define COUNT_LEADING_ZEROS_0 (-32) /* sic */
693 #if defined (__i960mx)          /* what is the proper symbol to test??? */
694 #define rshift_rhlc(r,h,l,c) \
695   do {                                                                  \
696     union {UDItype __ll;                                                \
697            struct {USItype __l, __h;} __i;                              \
698           } __nn;                                                       \
699     __nn.__i.__h = (h); __nn.__i.__l = (l);                             \
700     __asm__ ("shre %2,%1,%0"                                            \
701              : "=d" (r) : "dI" (__nn.__ll), "dI" (c));                  \
702   }
703 #endif /* i960mx */
704 #endif /* i960 */
705
706
707 /***************************************
708  **************  68000  ****************
709  ***************************************/
710 #if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
711 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
712   __asm__ ("add%.l %5,%1\n"                                             \
713            "addx%.l %3,%0"                                              \
714            : "=d" ((USItype)(sh)),                                      \
715              "=&d" ((USItype)(sl))                                      \
716            : "%0" ((USItype)(ah)),                                      \
717              "d" ((USItype)(bh)),                                       \
718              "%1" ((USItype)(al)),                                      \
719              "g" ((USItype)(bl)))
720 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
721   __asm__ ("sub%.l %5,%1\n"                                             \
722            "subx%.l %3,%0"                                              \
723            : "=d" ((USItype)(sh)),                                      \
724              "=&d" ((USItype)(sl))                                      \
725            : "0" ((USItype)(ah)),                                       \
726              "d" ((USItype)(bh)),                                       \
727              "1" ((USItype)(al)),                                       \
728              "g" ((USItype)(bl)))
729 #if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
730 #define umul_ppmm(w1, w0, u, v) \
731   __asm__ ("mulu%.l %3,%1:%0"                                           \
732            : "=d" ((USItype)(w0)),                                      \
733              "=d" ((USItype)(w1))                                       \
734            : "%0" ((USItype)(u)),                                       \
735              "dmi" ((USItype)(v)))
736 #define UMUL_TIME 45
737 #define udiv_qrnnd(q, r, n1, n0, d) \
738   __asm__ ("divu%.l %4,%1:%0"                                           \
739            : "=d" ((USItype)(q)),                                       \
740              "=d" ((USItype)(r))                                        \
741            : "0" ((USItype)(n0)),                                       \
742              "1" ((USItype)(n1)),                                       \
743              "dmi" ((USItype)(d)))
744 #define UDIV_TIME 90
745 #define sdiv_qrnnd(q, r, n1, n0, d) \
746   __asm__ ("divs%.l %4,%1:%0"                                           \
747            : "=d" ((USItype)(q)),                                       \
748              "=d" ((USItype)(r))                                        \
749            : "0" ((USItype)(n0)),                                       \
750              "1" ((USItype)(n1)),                                       \
751              "dmi" ((USItype)(d)))
752 #define count_leading_zeros(count, x) \
753   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
754            : "=d" ((USItype)(count))                                    \
755            : "od" ((USItype)(x)), "n" (0))
756 #define COUNT_LEADING_ZEROS_0 32
757 #else /* not mc68020 */
758 #define umul_ppmm(xh, xl, a, b) \
759   do { USItype __umul_tmp1, __umul_tmp2;                          \
760         __asm__ ("| Inlined umul_ppmm                         \n" \
761  "        move%.l %5,%3                                       \n" \
762  "        move%.l %2,%0                                       \n" \
763  "        move%.w %3,%1                                       \n" \
764  "        swap  %3                                            \n" \
765  "        swap  %0                                            \n" \
766  "        mulu  %2,%1                                         \n" \
767  "        mulu  %3,%0                                         \n" \
768  "        mulu  %2,%3                                         \n" \
769  "        swap  %2                                            \n" \
770  "        mulu  %5,%2                                         \n" \
771  "        add%.l        %3,%2                                 \n" \
772  "        jcc   1f                                            \n" \
773  "        add%.l        %#0x10000,%0                          \n" \
774  "1:    move%.l %2,%3                                         \n" \
775  "        clr%.w        %2                                    \n" \
776  "        swap  %2                                            \n" \
777  "        swap  %3                                            \n" \
778  "        clr%.w        %3                                    \n" \
779  "        add%.l        %3,%1                                 \n" \
780  "        addx%.l %2,%0                                       \n" \
781  "        | End inlined umul_ppmm"                                \
782               : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)),     \
783                 "=d" (__umul_tmp1), "=&d" (__umul_tmp2)           \
784               : "%2" ((USItype)(a)), "d" ((USItype)(b)));         \
785   } while (0)
786 #define UMUL_TIME 100
787 #define UDIV_TIME 400
788 #endif /* not mc68020 */
789 #endif /* mc68000 */
790
791
792 /***************************************
793  **************  88000  ****************
794  ***************************************/
795 #if defined (__m88000__) && W_TYPE_SIZE == 32
796 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
797   __asm__ ("addu.co %1,%r4,%r5\n"                                       \
798            "addu.ci %0,%r2,%r3"                                         \
799            : "=r" ((USItype)(sh)),                                      \
800              "=&r" ((USItype)(sl))                                      \
801            : "%rJ" ((USItype)(ah)),                                     \
802              "rJ" ((USItype)(bh)),                                      \
803              "%rJ" ((USItype)(al)),                                     \
804              "rJ" ((USItype)(bl)))
805 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
806   __asm__ ("subu.co %1,%r4,%r5\n"                                       \
807            "subu.ci %0,%r2,%r3"                                         \
808            : "=r" ((USItype)(sh)),                                      \
809              "=&r" ((USItype)(sl))                                      \
810            : "rJ" ((USItype)(ah)),                                      \
811              "rJ" ((USItype)(bh)),                                      \
812              "rJ" ((USItype)(al)),                                      \
813              "rJ" ((USItype)(bl)))
814 #define count_leading_zeros(count, x) \
815   do {                                                                  \
816     USItype __cbtmp;                                                    \
817     __asm__ ("ff1 %0,%1"                                                \
818              : "=r" (__cbtmp)                                           \
819              : "r" ((USItype)(x)));                                     \
820     (count) = __cbtmp ^ 31;                                             \
821   } while (0)
822 #define COUNT_LEADING_ZEROS_0 63 /* sic */
823 #if defined (__m88110__)
824 #define umul_ppmm(wh, wl, u, v) \
825   do {                                                                  \
826     union {UDItype __ll;                                                \
827            struct {USItype __h, __l;} __i;                              \
828           } __x;                                                        \
829     __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v));   \
830     (wh) = __x.__i.__h;                                                 \
831     (wl) = __x.__i.__l;                                                 \
832   } while (0)
833 #define udiv_qrnnd(q, r, n1, n0, d) \
834   ({union {UDItype __ll;                                                \
835            struct {USItype __h, __l;} __i;                              \
836           } __x, __q;                                                   \
837   __x.__i.__h = (n1); __x.__i.__l = (n0);                               \
838   __asm__ ("divu.d %0,%1,%2"                                            \
839            : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d));                \
840   (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
841 #define UMUL_TIME 5
842 #define UDIV_TIME 25
843 #else
844 #define UMUL_TIME 17
845 #define UDIV_TIME 150
846 #endif /* __m88110__ */
847 #endif /* __m88000__ */
848
849 /***************************************
850  **************  MIPS  *****************
851  ***************************************/
852 #if defined (__mips__) && W_TYPE_SIZE == 32
853 #if defined (__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && \
854                                                __GNUC_MINOR__ >= 4)
855 #define umul_ppmm(w1, w0, u, v) \
856   do {                                                                  \
857     UDItype _r;                                                         \
858     _r = (UDItype) u * v;                                               \
859     (w1) = _r >> 32;                                                    \
860     (w0) = (USItype) _r;                                                \
861   } while (0)
862 #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
863 #define umul_ppmm(w1, w0, u, v) \
864   __asm__ ("multu %2,%3"                                                \
865            : "=l" ((USItype)(w0)),                                      \
866              "=h" ((USItype)(w1))                                       \
867            : "d" ((USItype)(u)),                                        \
868              "d" ((USItype)(v)))
869 #else
870 #define umul_ppmm(w1, w0, u, v) \
871   __asm__ ("multu %2,%3 \n" \
872            "mflo %0 \n"     \
873            "mfhi %1"                                                        \
874            : "=d" ((USItype)(w0)),                                      \
875              "=d" ((USItype)(w1))                                       \
876            : "d" ((USItype)(u)),                                        \
877              "d" ((USItype)(v)))
878 #endif
879 #define UMUL_TIME 10
880 #define UDIV_TIME 100
881 #endif /* __mips__ */
882
883 /***************************************
884  **************  MIPS/64  **************
885  ***************************************/
886 #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
887 #if (__GNUC__ >= 5) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
888 typedef unsigned int UTItype __attribute__ ((mode (TI)));
889 #define umul_ppmm(w1, w0, u, v) \
890   do {                                                                 \
891     UTItype _r;                                                        \
892     _r = (UTItype) u * v;                                              \
893     (w1) = _r >> 64;                                                   \
894     (w0) = (UDItype) _r;                                               \
895   } while (0)
896 #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
897 #define umul_ppmm(w1, w0, u, v) \
898   __asm__ ("dmultu %2,%3"                                               \
899            : "=l" ((UDItype)(w0)),                                      \
900              "=h" ((UDItype)(w1))                                       \
901            : "d" ((UDItype)(u)),                                        \
902              "d" ((UDItype)(v)))
903 #else
904 #define umul_ppmm(w1, w0, u, v) \
905   __asm__ ("dmultu %2,%3 \n"    \
906            "mflo %0 \n"         \
907            "mfhi %1"                                                        \
908            : "=d" ((UDItype)(w0)),                                      \
909              "=d" ((UDItype)(w1))                                       \
910            : "d" ((UDItype)(u)),                                        \
911              "d" ((UDItype)(v)))
912 #endif
913 #define UMUL_TIME 20
914 #define UDIV_TIME 140
915 #endif /* __mips__ */
916
917
918 /***************************************
919  **************  32000  ****************
920  ***************************************/
921 #if defined (__ns32000__) && W_TYPE_SIZE == 32
922 #define umul_ppmm(w1, w0, u, v) \
923   ({union {UDItype __ll;                                                \
924            struct {USItype __l, __h;} __i;                              \
925           } __xx;                                                       \
926   __asm__ ("meid %2,%0"                                                 \
927            : "=g" (__xx.__ll)                                           \
928            : "%0" ((USItype)(u)),                                       \
929              "g" ((USItype)(v)));                                       \
930   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
931 #define __umulsidi3(u, v) \
932   ({UDItype __w;                                                        \
933     __asm__ ("meid %2,%0"                                               \
934              : "=g" (__w)                                               \
935              : "%0" ((USItype)(u)),                                     \
936                "g" ((USItype)(v)));                                     \
937     __w; })
938 #define udiv_qrnnd(q, r, n1, n0, d) \
939   ({union {UDItype __ll;                                                \
940            struct {USItype __l, __h;} __i;                              \
941           } __xx;                                                       \
942   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
943   __asm__ ("deid %2,%0"                                                 \
944            : "=g" (__xx.__ll)                                           \
945            : "0" (__xx.__ll),                                           \
946              "g" ((USItype)(d)));                                       \
947   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
948 #define count_trailing_zeros(count,x) \
949   do {
950     __asm__ ("ffsd      %2,%0"                                          \
951              : "=r" ((USItype) (count))                                 \
952              : "0" ((USItype) 0),                                       \
953                "r" ((USItype) (x)));                                    \
954   } while (0)
955 #endif /* __ns32000__ */
956
957
958 /***************************************
959  **************  PPC  ******************
960  ***************************************/
961 #if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32
962 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
963   do {                                                                  \
964     if (__builtin_constant_p (bh) && (bh) == 0)                         \
965       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
966              : "=r" ((sh)),                                             \
967                "=&r" ((sl))                                             \
968              : "%r" ((USItype)(ah)),                                    \
969                "%r" ((USItype)(al)),                                    \
970                "rI" ((USItype)(bl)));                                   \
971     else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)          \
972       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
973              : "=r" ((sh)),                                             \
974                "=&r" ((sl))                                             \
975              : "%r" ((USItype)(ah)),                                    \
976                "%r" ((USItype)(al)),                                    \
977                "rI" ((USItype)(bl)));                                   \
978     else                                                                \
979       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
980              : "=r" ((sh)),                                             \
981                "=&r" ((sl))                                             \
982              : "%r" ((USItype)(ah)),                                    \
983                "r" ((USItype)(bh)),                                     \
984                "%r" ((USItype)(al)),                                    \
985                "rI" ((USItype)(bl)));                                   \
986   } while (0)
987 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
988   do {                                                                  \
989     if (__builtin_constant_p (ah) && (ah) == 0)                         \
990       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
991                : "=r" ((sh)),                                           \
992                  "=&r" ((sl))                                           \
993                : "r" ((USItype)(bh)),                                   \
994                  "rI" ((USItype)(al)),                                  \
995                  "r" ((USItype)(bl)));                                  \
996     else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0)          \
997       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
998                : "=r" ((sh)),                                  \
999                  "=&r" ((sl))                                  \
1000                : "r" ((USItype)(bh)),                                   \
1001                  "rI" ((USItype)(al)),                                  \
1002                  "r" ((USItype)(bl)));                                  \
1003     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
1004       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
1005                : "=r" ((sh)),                                           \
1006                  "=&r" ((sl))                                           \
1007                : "r" ((USItype)(ah)),                                   \
1008                  "rI" ((USItype)(al)),                                  \
1009                  "r" ((USItype)(bl)));                                  \
1010     else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)          \
1011       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
1012                : "=r" ((sh)),                                           \
1013                  "=&r" ((sl))                                           \
1014                : "r" ((USItype)(ah)),                                   \
1015                  "rI" ((USItype)(al)),                                  \
1016                  "r" ((USItype)(bl)));                                  \
1017     else                                                                \
1018       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
1019                : "=r" ((sh)),                                           \
1020                  "=&r" ((sl))                                           \
1021                : "r" ((USItype)(ah)),                                   \
1022                  "r" ((USItype)(bh)),                                   \
1023                  "rI" ((USItype)(al)),                                  \
1024                  "r" ((USItype)(bl)));                                  \
1025   } while (0)
1026 #define count_leading_zeros(count, x) \
1027   __asm__ ("{cntlz|cntlzw} %0,%1"                                       \
1028            : "=r" ((count))                                             \
1029            : "r" ((USItype)(x)))
1030 #define COUNT_LEADING_ZEROS_0 32
1031 #if defined (_ARCH_PPC)
1032 #define umul_ppmm(ph, pl, m0, m1) \
1033   do {                                                                  \
1034     USItype __m0 = (m0), __m1 = (m1);                                   \
1035     __asm__ ("mulhwu %0,%1,%2"                                          \
1036              : "=r" (ph)                                                \
1037              : "%r" (__m0),                                             \
1038                "r" (__m1));                                             \
1039     (pl) = __m0 * __m1;                                                 \
1040   } while (0)
1041 #define UMUL_TIME 15
1042 #define smul_ppmm(ph, pl, m0, m1) \
1043   do {                                                                  \
1044     SItype __m0 = (m0), __m1 = (m1);                                    \
1045     __asm__ ("mulhw %0,%1,%2"                                           \
1046              : "=r" ((SItype) ph)                                       \
1047              : "%r" (__m0),                                             \
1048                "r" (__m1));                                             \
1049     (pl) = __m0 * __m1;                                                 \
1050   } while (0)
1051 #define SMUL_TIME 14
1052 #define UDIV_TIME 120
1053 #else
1054 #define umul_ppmm(xh, xl, m0, m1) \
1055   do {                                                                  \
1056     USItype __m0 = (m0), __m1 = (m1);                                   \
1057     __asm__ ("mul %0,%2,%3"                                             \
1058              : "=r" ((xh)),                                             \
1059                "=q" ((xl))                                              \
1060              : "r" (__m0),                                              \
1061                "r" (__m1));                                             \
1062     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1063              + (((SItype) __m1 >> 31) & __m0));                         \
1064   } while (0)
1065 #define UMUL_TIME 8
1066 #define smul_ppmm(xh, xl, m0, m1) \
1067   __asm__ ("mul %0,%2,%3"                                               \
1068            : "=r" ((SItype)(xh)),                                       \
1069              "=q" ((SItype)(xl))                                        \
1070            : "r" (m0),                                                  \
1071              "r" (m1))
1072 #define SMUL_TIME 4
1073 #define sdiv_qrnnd(q, r, nh, nl, d) \
1074   __asm__ ("div %0,%2,%4"                                               \
1075            : "=r" ((SItype)(q)), "=q" ((SItype)(r))                     \
1076            : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
1077 #define UDIV_TIME 100
1078 #endif
1079 #endif /* Power architecture variants.  */
1080
1081 /* Powerpc 64 bit support taken from gmp-4.1.2. */
1082 /* We should test _IBMR2 here when we add assembly support for the system
1083    vendor compilers.  */
1084 #if 0 /* Not yet enabled because we don't have hardware for a test. */
1085 #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
1086 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1087   do {                                                                  \
1088     if (__builtin_constant_p (bh) && (bh) == 0)                         \
1089       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
1090              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1091     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
1092       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
1093              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1094     else                                                                \
1095       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
1096              : "=r" (sh), "=&r" (sl)                                    \
1097              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
1098   } while (0)
1099 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1100   do {                                                                  \
1101     if (__builtin_constant_p (ah) && (ah) == 0)                         \
1102       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
1103                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1104     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
1105       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
1106                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1107     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
1108       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
1109                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1110     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
1111       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
1112                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1113     else                                                                \
1114       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
1115                : "=r" (sh), "=&r" (sl)                                  \
1116                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
1117   } while (0)
1118 #define count_leading_zeros(count, x) \
1119   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
1120 #define COUNT_LEADING_ZEROS_0 64
1121 #define umul_ppmm(ph, pl, m0, m1) \
1122   do {                                                                  \
1123     UDItype __m0 = (m0), __m1 = (m1);                                   \
1124     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
1125     (pl) = __m0 * __m1;                                                 \
1126   } while (0)
1127 #define UMUL_TIME 15
1128 #define smul_ppmm(ph, pl, m0, m1) \
1129   do {                                                                  \
1130     DItype __m0 = (m0), __m1 = (m1);                                    \
1131     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
1132     (pl) = __m0 * __m1;                                                 \
1133   } while (0)
1134 #define SMUL_TIME 14  /* ??? */
1135 #define UDIV_TIME 120 /* ??? */
1136 #endif /* 64-bit PowerPC.  */
1137 #endif /* if 0 */
1138
1139 /***************************************
1140  **************  PYR  ******************
1141  ***************************************/
1142 #if defined (__pyr__) && W_TYPE_SIZE == 32
1143 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1144   __asm__ ("addw        %5,%1 \n" \
1145            "addwc       %3,%0"                                          \
1146            : "=r" ((USItype)(sh)),                                      \
1147              "=&r" ((USItype)(sl))                                      \
1148            : "%0" ((USItype)(ah)),                                      \
1149              "g" ((USItype)(bh)),                                       \
1150              "%1" ((USItype)(al)),                                      \
1151              "g" ((USItype)(bl)))
1152 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1153   __asm__ ("subw        %5,%1 \n" \
1154            "subwb       %3,%0"                                          \
1155            : "=r" ((USItype)(sh)),                                      \
1156              "=&r" ((USItype)(sl))                                      \
1157            : "0" ((USItype)(ah)),                                       \
1158              "g" ((USItype)(bh)),                                       \
1159              "1" ((USItype)(al)),                                       \
1160              "g" ((USItype)(bl)))
1161 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
1162 #define umul_ppmm(w1, w0, u, v) \
1163   ({union {UDItype __ll;                                                \
1164            struct {USItype __h, __l;} __i;                              \
1165           } __xx;                                                       \
1166   __asm__ ("movw %1,%R0 \n" \
1167            "uemul %2,%0"                                                \
1168            : "=&r" (__xx.__ll)                                          \
1169            : "g" ((USItype) (u)),                                       \
1170              "g" ((USItype)(v)));                                       \
1171   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
1172 #endif /* __pyr__ */
1173
1174
1175 /***************************************
1176  **************  RT/ROMP  **************
1177  ***************************************/
1178 #if defined (__ibm032__) /* RT/ROMP */  && W_TYPE_SIZE == 32
1179 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1180   __asm__ ("a %1,%5 \n" \
1181            "ae %0,%3"                                                   \
1182            : "=r" ((USItype)(sh)),                                      \
1183              "=&r" ((USItype)(sl))                                      \
1184            : "%0" ((USItype)(ah)),                                      \
1185              "r" ((USItype)(bh)),                                       \
1186              "%1" ((USItype)(al)),                                      \
1187              "r" ((USItype)(bl)))
1188 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1189   __asm__ ("s %1,%5\n" \
1190            "se %0,%3"                                                   \
1191            : "=r" ((USItype)(sh)),                                      \
1192              "=&r" ((USItype)(sl))                                      \
1193            : "0" ((USItype)(ah)),                                       \
1194              "r" ((USItype)(bh)),                                       \
1195              "1" ((USItype)(al)),                                       \
1196              "r" ((USItype)(bl)))
1197 #define umul_ppmm(ph, pl, m0, m1) \
1198   do {                                                                  \
1199     USItype __m0 = (m0), __m1 = (m1);                                   \
1200     __asm__ (                                                           \
1201        "s       r2,r2    \n" \
1202        "mts     r10,%2   \n" \
1203        "m       r2,%3    \n" \
1204        "m       r2,%3    \n" \
1205        "m       r2,%3    \n" \
1206        "m       r2,%3    \n" \
1207        "m       r2,%3    \n" \
1208        "m       r2,%3    \n" \
1209        "m       r2,%3    \n" \
1210        "m       r2,%3    \n" \
1211        "m       r2,%3    \n" \
1212        "m       r2,%3    \n" \
1213        "m       r2,%3    \n" \
1214        "m       r2,%3    \n" \
1215        "m       r2,%3    \n" \
1216        "m       r2,%3    \n" \
1217        "m       r2,%3    \n" \
1218        "m       r2,%3    \n" \
1219        "cas     %0,r2,r0 \n" \
1220        "mfs     r10,%1"                                                 \
1221              : "=r" ((USItype)(ph)),                                    \
1222                "=r" ((USItype)(pl))                                     \
1223              : "%r" (__m0),                                             \
1224                 "r" (__m1)                                              \
1225              : "r2");                                                   \
1226     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1227              + (((SItype) __m1 >> 31) & __m0));                         \
1228   } while (0)
1229 #define UMUL_TIME 20
1230 #define UDIV_TIME 200
1231 #define count_leading_zeros(count, x) \
1232   do {                                                                  \
1233     if ((x) >= 0x10000)                                                 \
1234       __asm__ ("clz     %0,%1"                                          \
1235                : "=r" ((USItype)(count))                                \
1236                : "r" ((USItype)(x) >> 16));                             \
1237     else                                                                \
1238       {                                                                 \
1239         __asm__ ("clz   %0,%1"                                          \
1240                  : "=r" ((USItype)(count))                              \
1241                  : "r" ((USItype)(x)));                                 \
1242         (count) += 16;                                                  \
1243       }                                                                 \
1244   } while (0)
1245 #endif /* RT/ROMP */
1246
1247
1248 /***************************************
1249  **************  SH2  ******************
1250  ***************************************/
1251 #if (defined (__sh2__) || defined(__sh3__) || defined(__SH4__) ) \
1252     && W_TYPE_SIZE == 32
1253 #define umul_ppmm(w1, w0, u, v) \
1254   __asm__ (                                                             \
1255         "dmulu.l %2,%3\n"  \
1256         "sts    macl,%1\n" \
1257         "sts    mach,%0"                                                \
1258            : "=r" ((USItype)(w1)),                                      \
1259              "=r" ((USItype)(w0))                                       \
1260            : "r" ((USItype)(u)),                                        \
1261              "r" ((USItype)(v))                                         \
1262            : "macl", "mach")
1263 #define UMUL_TIME 5
1264 #endif
1265
1266 /***************************************
1267  **************  SPARC  ****************
1268  ***************************************/
1269 #if defined (__sparc__) && W_TYPE_SIZE == 32
1270 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1271   __asm__ ("addcc %r4,%5,%1\n" \
1272            "addx %r2,%3,%0"                                             \
1273            : "=r" ((USItype)(sh)),                                      \
1274              "=&r" ((USItype)(sl))                                      \
1275            : "%rJ" ((USItype)(ah)),                                     \
1276              "rI" ((USItype)(bh)),                                      \
1277              "%rJ" ((USItype)(al)),                                     \
1278              "rI" ((USItype)(bl))                                       \
1279            __CLOBBER_CC)
1280 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1281   __asm__ ("subcc %r4,%5,%1\n" \
1282            "subx %r2,%3,%0"                                             \
1283            : "=r" ((USItype)(sh)),                                      \
1284              "=&r" ((USItype)(sl))                                      \
1285            : "rJ" ((USItype)(ah)),                                      \
1286              "rI" ((USItype)(bh)),                                      \
1287              "rJ" ((USItype)(al)),                                      \
1288              "rI" ((USItype)(bl))                                       \
1289            __CLOBBER_CC)
1290 #if defined (__sparc_v8__)
1291 /* Don't match immediate range because, 1) it is not often useful,
1292    2) the 'I' flag thinks of the range as a 13 bit signed interval,
1293    while we want to match a 13 bit interval, sign extended to 32 bits,
1294    but INTERPRETED AS UNSIGNED.  */
1295 #define umul_ppmm(w1, w0, u, v) \
1296   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1297            : "=r" ((USItype)(w1)),                                      \
1298              "=r" ((USItype)(w0))                                       \
1299            : "r" ((USItype)(u)),                                        \
1300              "r" ((USItype)(v)))
1301 #define UMUL_TIME 5
1302 #ifndef SUPERSPARC      /* SuperSPARC's udiv only handles 53 bit dividends */
1303 #define udiv_qrnnd(q, r, n1, n0, d) \
1304   do {                                                                  \
1305     USItype __q;                                                        \
1306     __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"                     \
1307              : "=r" ((USItype)(__q))                                    \
1308              : "r" ((USItype)(n1)),                                     \
1309                "r" ((USItype)(n0)),                                     \
1310                "r" ((USItype)(d)));                                     \
1311     (r) = (n0) - __q * (d);                                             \
1312     (q) = __q;                                                          \
1313   } while (0)
1314 #define UDIV_TIME 25
1315 #endif /* SUPERSPARC */
1316 #else /* ! __sparc_v8__ */
1317 #if defined (__sparclite__)
1318 /* This has hardware multiply but not divide.  It also has two additional
1319    instructions scan (ffs from high bit) and divscc.  */
1320 #define umul_ppmm(w1, w0, u, v) \
1321   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1322            : "=r" ((USItype)(w1)),                                      \
1323              "=r" ((USItype)(w0))                                       \
1324            : "r" ((USItype)(u)),                                        \
1325              "r" ((USItype)(v)))
1326 #define UMUL_TIME 5
1327 #define udiv_qrnnd(q, r, n1, n0, d) \
1328   __asm__ ("! Inlined udiv_qrnnd                                     \n" \
1329  "        wr    %%g0,%2,%%y     ! Not a delayed write for sparclite  \n" \
1330  "        tst   %%g0                                                 \n" \
1331  "        divscc        %3,%4,%%g1                                   \n" \
1332  "        divscc        %%g1,%4,%%g1                                 \n" \
1333  "        divscc        %%g1,%4,%%g1                                 \n" \
1334  "        divscc        %%g1,%4,%%g1                                 \n" \
1335  "        divscc        %%g1,%4,%%g1                                 \n" \
1336  "        divscc        %%g1,%4,%%g1                                 \n" \
1337  "        divscc        %%g1,%4,%%g1                                 \n" \
1338  "        divscc        %%g1,%4,%%g1                                 \n" \
1339  "        divscc        %%g1,%4,%%g1                                 \n" \
1340  "        divscc        %%g1,%4,%%g1                                 \n" \
1341  "        divscc        %%g1,%4,%%g1                                 \n" \
1342  "        divscc        %%g1,%4,%%g1                                 \n" \
1343  "        divscc        %%g1,%4,%%g1                                 \n" \
1344  "        divscc        %%g1,%4,%%g1                                 \n" \
1345  "        divscc        %%g1,%4,%%g1                                 \n" \
1346  "        divscc        %%g1,%4,%%g1                                 \n" \
1347  "        divscc        %%g1,%4,%%g1                                 \n" \
1348  "        divscc        %%g1,%4,%%g1                                 \n" \
1349  "        divscc        %%g1,%4,%%g1                                 \n" \
1350  "        divscc        %%g1,%4,%%g1                                 \n" \
1351  "        divscc        %%g1,%4,%%g1                                 \n" \
1352  "        divscc        %%g1,%4,%%g1                                 \n" \
1353  "        divscc        %%g1,%4,%%g1                                 \n" \
1354  "        divscc        %%g1,%4,%%g1                                 \n" \
1355  "        divscc        %%g1,%4,%%g1                                 \n" \
1356  "        divscc        %%g1,%4,%%g1                                 \n" \
1357  "        divscc        %%g1,%4,%%g1                                 \n" \
1358  "        divscc        %%g1,%4,%%g1                                 \n" \
1359  "        divscc        %%g1,%4,%%g1                                 \n" \
1360  "        divscc        %%g1,%4,%%g1                                 \n" \
1361  "        divscc        %%g1,%4,%%g1                                 \n" \
1362  "        divscc        %%g1,%4,%0                                   \n" \
1363  "        rd    %%y,%1                                               \n" \
1364  "        bl,a 1f                                                    \n" \
1365  "        add   %1,%4,%1                                             \n" \
1366  "1:    ! End of inline udiv_qrnnd"                                     \
1367            : "=r" ((USItype)(q)),                                       \
1368              "=r" ((USItype)(r))                                        \
1369            : "r" ((USItype)(n1)),                                       \
1370              "r" ((USItype)(n0)),                                       \
1371              "rI" ((USItype)(d))                                        \
1372            : "%g1" __AND_CLOBBER_CC)
1373 #define UDIV_TIME 37
1374 #define count_leading_zeros(count, x) \
1375   __asm__ ("scan %1,0,%0"                                               \
1376            : "=r" ((USItype)(x))                                        \
1377            : "r" ((USItype)(count)))
1378 /* Early sparclites return 63 for an argument of 0, but they warn that future
1379    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1380    undefined.  */
1381 #endif /* __sparclite__ */
1382 #endif /* __sparc_v8__ */
1383 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
1384 #ifndef umul_ppmm
1385 #define umul_ppmm(w1, w0, u, v) \
1386   __asm__ ("! Inlined umul_ppmm                                        \n" \
1387  "        wr    %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr  \n" \
1388  "        sra   %3,31,%%g2      ! Don't move this insn                 \n" \
1389  "        and   %2,%%g2,%%g2    ! Don't move this insn                 \n" \
1390  "        andcc %%g0,0,%%g1     ! Don't move this insn                 \n" \
1391  "        mulscc        %%g1,%3,%%g1                                   \n" \
1392  "        mulscc        %%g1,%3,%%g1                                   \n" \
1393  "        mulscc        %%g1,%3,%%g1                                   \n" \
1394  "        mulscc        %%g1,%3,%%g1                                   \n" \
1395  "        mulscc        %%g1,%3,%%g1                                   \n" \
1396  "        mulscc        %%g1,%3,%%g1                                   \n" \
1397  "        mulscc        %%g1,%3,%%g1                                   \n" \
1398  "        mulscc        %%g1,%3,%%g1                                   \n" \
1399  "        mulscc        %%g1,%3,%%g1                                   \n" \
1400  "        mulscc        %%g1,%3,%%g1                                   \n" \
1401  "        mulscc        %%g1,%3,%%g1                                   \n" \
1402  "        mulscc        %%g1,%3,%%g1                                   \n" \
1403  "        mulscc        %%g1,%3,%%g1                                   \n" \
1404  "        mulscc        %%g1,%3,%%g1                                   \n" \
1405  "        mulscc        %%g1,%3,%%g1                                   \n" \
1406  "        mulscc        %%g1,%3,%%g1                                   \n" \
1407  "        mulscc        %%g1,%3,%%g1                                   \n" \
1408  "        mulscc        %%g1,%3,%%g1                                   \n" \
1409  "        mulscc        %%g1,%3,%%g1                                   \n" \
1410  "        mulscc        %%g1,%3,%%g1                                   \n" \
1411  "        mulscc        %%g1,%3,%%g1                                   \n" \
1412  "        mulscc        %%g1,%3,%%g1                                   \n" \
1413  "        mulscc        %%g1,%3,%%g1                                   \n" \
1414  "        mulscc        %%g1,%3,%%g1                                   \n" \
1415  "        mulscc        %%g1,%3,%%g1                                   \n" \
1416  "        mulscc        %%g1,%3,%%g1                                   \n" \
1417  "        mulscc        %%g1,%3,%%g1                                   \n" \
1418  "        mulscc        %%g1,%3,%%g1                                   \n" \
1419  "        mulscc        %%g1,%3,%%g1                                   \n" \
1420  "        mulscc        %%g1,%3,%%g1                                   \n" \
1421  "        mulscc        %%g1,%3,%%g1                                   \n" \
1422  "        mulscc        %%g1,%3,%%g1                                   \n" \
1423  "        mulscc        %%g1,0,%%g1                                    \n" \
1424  "        add   %%g1,%%g2,%0                                           \n" \
1425  "        rd    %%y,%1"                                                 \
1426            : "=r" ((USItype)(w1)),                                      \
1427              "=r" ((USItype)(w0))                                       \
1428            : "%rI" ((USItype)(u)),                                      \
1429              "r" ((USItype)(v))                                         \
1430            : "%g1", "%g2" __AND_CLOBBER_CC)
1431 #define UMUL_TIME 39            /* 39 instructions */
1432 #endif
1433 #ifndef udiv_qrnnd
1434 #ifndef LONGLONG_STANDALONE
1435 #define udiv_qrnnd(q, r, n1, n0, d) \
1436   do { USItype __r;                                                     \
1437     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
1438     (r) = __r;                                                          \
1439   } while (0)
1440 extern USItype __udiv_qrnnd ();
1441 #define UDIV_TIME 140
1442 #endif /* LONGLONG_STANDALONE */
1443 #endif /* udiv_qrnnd */
1444 #endif /* __sparc__ */
1445
1446
1447 /***************************************
1448  **************  VAX  ******************
1449  ***************************************/
1450 #if defined (__vax__) && W_TYPE_SIZE == 32
1451 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1452   __asm__ ("addl2 %5,%1\n" \
1453            "adwc %3,%0"                                                 \
1454            : "=g" ((USItype)(sh)),                                      \
1455              "=&g" ((USItype)(sl))                                      \
1456            : "%0" ((USItype)(ah)),                                      \
1457              "g" ((USItype)(bh)),                                       \
1458              "%1" ((USItype)(al)),                                      \
1459              "g" ((USItype)(bl)))
1460 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1461   __asm__ ("subl2 %5,%1\n" \
1462            "sbwc %3,%0"                                                 \
1463            : "=g" ((USItype)(sh)),                                      \
1464              "=&g" ((USItype)(sl))                                      \
1465            : "0" ((USItype)(ah)),                                       \
1466              "g" ((USItype)(bh)),                                       \
1467              "1" ((USItype)(al)),                                       \
1468              "g" ((USItype)(bl)))
1469 #define umul_ppmm(xh, xl, m0, m1) \
1470   do {                                                                  \
1471     union {UDItype __ll;                                                \
1472            struct {USItype __l, __h;} __i;                              \
1473           } __xx;                                                       \
1474     USItype __m0 = (m0), __m1 = (m1);                                   \
1475     __asm__ ("emul %1,%2,$0,%0"                                         \
1476              : "=g" (__xx.__ll)                                         \
1477              : "g" (__m0),                                              \
1478                "g" (__m1));                                             \
1479     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1480     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1481              + (((SItype) __m1 >> 31) & __m0));                         \
1482   } while (0)
1483 #define sdiv_qrnnd(q, r, n1, n0, d) \
1484   do {                                                                  \
1485     union {DItype __ll;                                                 \
1486            struct {SItype __l, __h;} __i;                               \
1487           } __xx;                                                       \
1488     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1489     __asm__ ("ediv %3,%2,%0,%1"                                         \
1490              : "=g" (q), "=g" (r)                                       \
1491              : "g" (__xx.__ll), "g" (d));                               \
1492   } while (0)
1493 #endif /* __vax__ */
1494
1495
1496 /***************************************
1497  **************  Z8000  ****************
1498  ***************************************/
1499 #if defined (__z8000__) && W_TYPE_SIZE == 16
1500 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1501   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1502            : "=r" ((unsigned int)(sh)),                                 \
1503              "=&r" ((unsigned int)(sl))                                 \
1504            : "%0" ((unsigned int)(ah)),                                 \
1505              "r" ((unsigned int)(bh)),                                  \
1506              "%1" ((unsigned int)(al)),                                 \
1507              "rQR" ((unsigned int)(bl)))
1508 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1509   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1510            : "=r" ((unsigned int)(sh)),                                 \
1511              "=&r" ((unsigned int)(sl))                                 \
1512            : "0" ((unsigned int)(ah)),                                  \
1513              "r" ((unsigned int)(bh)),                                  \
1514              "1" ((unsigned int)(al)),                                  \
1515              "rQR" ((unsigned int)(bl)))
1516 #define umul_ppmm(xh, xl, m0, m1) \
1517   do {                                                                  \
1518     union {long int __ll;                                               \
1519            struct {unsigned int __h, __l;} __i;                         \
1520           } __xx;                                                       \
1521     unsigned int __m0 = (m0), __m1 = (m1);                              \
1522     __asm__ ("mult      %S0,%H3"                                        \
1523              : "=r" (__xx.__i.__h),                                     \
1524                "=r" (__xx.__i.__l)                                      \
1525              : "%1" (__m0),                                             \
1526                "rQR" (__m1));                                           \
1527     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1528     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1529              + (((signed int) __m1 >> 15) & __m0));                     \
1530   } while (0)
1531 #endif /* __z8000__ */
1532
1533 #endif /* __GNUC__ */
1534 #endif /* !__riscos__ */
1535
1536
1537 /***************************************
1538  ***********  Generic Versions  ********
1539  ***************************************/
1540 #if !defined (umul_ppmm) && defined (__umulsidi3)
1541 #define umul_ppmm(ph, pl, m0, m1) \
1542   {                                                                     \
1543     UDWtype __ll = __umulsidi3 (m0, m1);                                \
1544     ph = (UWtype) (__ll >> W_TYPE_SIZE);                                \
1545     pl = (UWtype) __ll;                                                 \
1546   }
1547 #endif
1548
1549 #if !defined (__umulsidi3)
1550 #define __umulsidi3(u, v) \
1551   ({UWtype __hi, __lo;                                                  \
1552     umul_ppmm (__hi, __lo, u, v);                                       \
1553     ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1554 #endif
1555
1556 /* If this machine has no inline assembler, use C macros.  */
1557
1558 #if !defined (add_ssaaaa)
1559 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1560   do {                                                                  \
1561     UWtype __x;                                                         \
1562     __x = (al) + (bl);                                                  \
1563     (sh) = (ah) + (bh) + (__x < (al));                                  \
1564     (sl) = __x;                                                         \
1565   } while (0)
1566 #endif
1567
1568 #if !defined (sub_ddmmss)
1569 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1570   do {                                                                  \
1571     UWtype __x;                                                         \
1572     __x = (al) - (bl);                                                  \
1573     (sh) = (ah) - (bh) - (__x > (al));                                  \
1574     (sl) = __x;                                                         \
1575   } while (0)
1576 #endif
1577
1578 #if !defined (umul_ppmm)
1579 #define umul_ppmm(w1, w0, u, v)                                         \
1580   do {                                                                  \
1581     UWtype __x0, __x1, __x2, __x3;                                      \
1582     UHWtype __ul, __vl, __uh, __vh;                                     \
1583     UWtype __u = (u), __v = (v);                                        \
1584                                                                         \
1585     __ul = __ll_lowpart (__u);                                          \
1586     __uh = __ll_highpart (__u);                                         \
1587     __vl = __ll_lowpart (__v);                                          \
1588     __vh = __ll_highpart (__v);                                         \
1589                                                                         \
1590     __x0 = (UWtype) __ul * __vl;                                        \
1591     __x1 = (UWtype) __ul * __vh;                                        \
1592     __x2 = (UWtype) __uh * __vl;                                        \
1593     __x3 = (UWtype) __uh * __vh;                                        \
1594                                                                         \
1595     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1596     __x1 += __x2;               /* but this indeed can */               \
1597     if (__x1 < __x2)            /* did we get it? */                    \
1598       __x3 += __ll_B;           /* yes, add it in the proper pos. */    \
1599                                                                         \
1600     (w1) = __x3 + __ll_highpart (__x1);                                 \
1601     (w0) = (__ll_lowpart (__x1) << W_TYPE_SIZE/2) + __ll_lowpart (__x0);\
1602   } while (0)
1603 #endif
1604
1605 #if !defined (umul_ppmm)
1606 #define smul_ppmm(w1, w0, u, v)                                         \
1607   do {                                                                  \
1608     UWtype __w1;                                                        \
1609     UWtype __m0 = (u), __m1 = (v);                                      \
1610     umul_ppmm (__w1, w0, __m0, __m1);                                   \
1611     (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1)                 \
1612                 - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0);                \
1613   } while (0)
1614 #endif
1615
1616 /* Define this unconditionally, so it can be used for debugging.  */
1617 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1618   do {                                                                  \
1619     UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;                     \
1620     __d1 = __ll_highpart (d);                                           \
1621     __d0 = __ll_lowpart (d);                                            \
1622                                                                         \
1623     __r1 = (n1) % __d1;                                                 \
1624     __q1 = (n1) / __d1;                                                 \
1625     __m = (UWtype) __q1 * __d0;                                         \
1626     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1627     if (__r1 < __m)                                                     \
1628       {                                                                 \
1629         __q1--, __r1 += (d);                                            \
1630         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1631           if (__r1 < __m)                                               \
1632             __q1--, __r1 += (d);                                        \
1633       }                                                                 \
1634     __r1 -= __m;                                                        \
1635                                                                         \
1636     __r0 = __r1 % __d1;                                                 \
1637     __q0 = __r1 / __d1;                                                 \
1638     __m = (UWtype) __q0 * __d0;                                         \
1639     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1640     if (__r0 < __m)                                                     \
1641       {                                                                 \
1642         __q0--, __r0 += (d);                                            \
1643         if (__r0 >= (d))                                                \
1644           if (__r0 < __m)                                               \
1645             __q0--, __r0 += (d);                                        \
1646       }                                                                 \
1647     __r0 -= __m;                                                        \
1648                                                                         \
1649     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1650     (r) = __r0;                                                         \
1651   } while (0)
1652
1653 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1654    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1655 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1656 #define udiv_qrnnd(q, r, nh, nl, d) \
1657   do {                                                                  \
1658     UWtype __r;                                                         \
1659     (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d);                         \
1660     (r) = __r;                                                          \
1661   } while (0)
1662 #endif
1663
1664 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1665 #if !defined (udiv_qrnnd)
1666 #define UDIV_NEEDS_NORMALIZATION 1
1667 #define udiv_qrnnd __udiv_qrnnd_c
1668 #endif
1669
1670 #if !defined (count_leading_zeros)
1671 extern
1672 #ifdef __STDC__
1673 const
1674 #endif
1675 unsigned char _gcry_clz_tab[];
1676 #define MPI_INTERNAL_NEED_CLZ_TAB 1
1677 #define count_leading_zeros(count, x) \
1678   do {                                                                  \
1679     UWtype __xr = (x);                                                  \
1680     UWtype __a;                                                         \
1681                                                                         \
1682     if (W_TYPE_SIZE <= 32)                                              \
1683       {                                                                 \
1684         __a = __xr < ((UWtype) 1 << 2*__BITS4)                          \
1685           ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4)              \
1686           : (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4);\
1687       }                                                                 \
1688     else                                                                \
1689       {                                                                 \
1690         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1691           if (((__xr >> __a) & 0xff) != 0)                              \
1692             break;                                                      \
1693       }                                                                 \
1694                                                                         \
1695     (count) = W_TYPE_SIZE - (_gcry_clz_tab[__xr >> __a] + __a);         \
1696   } while (0)
1697 /* This version gives a well-defined value for zero. */
1698 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1699 #endif
1700
1701 #if !defined (count_trailing_zeros)
1702 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1703    defined in asm, but if it is not, the C version above is good enough.  */
1704 #define count_trailing_zeros(count, x) \
1705   do {                                                                  \
1706     UWtype __ctz_x = (x);                                               \
1707     UWtype __ctz_c;                                                     \
1708     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1709     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1710   } while (0)
1711 #endif
1712
1713 #ifndef UDIV_NEEDS_NORMALIZATION
1714 #define UDIV_NEEDS_NORMALIZATION 0
1715 #endif