added assembler stuff for hppa
[gnupg.git] / mpi / pa7100 / mpih-shift.S
1 /* hppa   rshift, lshift
2  *        optimized for the PA7100, where is runs at 3.25 cycles/limb
3  *      Copyright (C) 1992, 1994 Free Software Foundation, Inc.
4  *      Copyright (c) 1997 by Werner Koch (dd9jn)
5  *
6  * This file is part of G10.
7  *
8  * G10 is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * G10 is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
21  *
22  * Note: This code is heavily based on the GNU MP Library.
23  *       Actually it's the same code with only minor changes in the
24  *       way the data is stored; this is to support the abstraction
25  *       of an optional secure memory allocation which may be used
26  *       to avoid revealing of sensitive data due to paging etc.
27  *       The GNU MP Library itself is published under the LGPL;
28  *       however I decided to publish this code under the plain GPL.
29  */
30
31
32
33 /*******************
34  * mpi_limb_t
35  * mpihelp_lshift( mpi_ptr_t wp,        (gr26)
36  *                 mpi_ptr_t up,        (gr25)
37  *                 mpi_size_t usize,    (gr24)
38  *                 unsigned cnt)        (gr23)
39  */
40
41         .code
42         .export         mpihelp_lshift
43 mpihelp_lshift
44         .proc
45         .callinfo       frame=64,no_calls
46         .entry
47
48         sh2add          %r24,%r25,%r25
49         sh2add          %r24,%r26,%r26
50         ldws,mb         -4(0,%r25),%r22
51         subi            32,%r23,%r1
52         mtsar           %r1
53         addib,=         -1,%r24,L$0004
54         vshd            %r0,%r22,%r28           ; compute carry out limb
55         ldws,mb         -4(0,%r25),%r29
56         addib,<=        -5,%r24,L$rest
57         vshd            %r22,%r29,%r20
58
59 L$loop  ldws,mb         -4(0,%r25),%r22
60         stws,mb         %r20,-4(0,%r26)
61         vshd            %r29,%r22,%r20
62         ldws,mb         -4(0,%r25),%r29
63         stws,mb         %r20,-4(0,%r26)
64         vshd            %r22,%r29,%r20
65         ldws,mb         -4(0,%r25),%r22
66         stws,mb         %r20,-4(0,%r26)
67         vshd            %r29,%r22,%r20
68         ldws,mb         -4(0,%r25),%r29
69         stws,mb         %r20,-4(0,%r26)
70         addib,>         -4,%r24,L$loop
71         vshd            %r22,%r29,%r20
72
73 L$rest  addib,=         4,%r24,L$end1
74         nop
75 L$eloop ldws,mb         -4(0,%r25),%r22
76         stws,mb         %r20,-4(0,%r26)
77         addib,<=        -1,%r24,L$end2
78         vshd            %r29,%r22,%r20
79         ldws,mb         -4(0,%r25),%r29
80         stws,mb         %r20,-4(0,%r26)
81         addib,>         -1,%r24,L$eloop
82         vshd            %r22,%r29,%r20
83
84 L$end1  stws,mb         %r20,-4(0,%r26)
85         vshd            %r29,%r0,%r20
86         bv              0(%r2)
87         stw             %r20,-4(0,%r26)
88 L$end2  stws,mb         %r20,-4(0,%r26)
89 L$0004  vshd            %r22,%r0,%r20
90         bv              0(%r2)
91         stw             %r20,-4(0,%r26)
92
93         .exit
94         .procend
95
96
97
98 /*******************
99  * mpi_limb_t
100  * mpihelp_rshift( mpi_ptr_t wp,       (gr26)
101  *                 mpi_ptr_t up,       (gr25)
102  *                 mpi_size_t usize,   (gr24)
103  *                 unsigned cnt)       (gr23)
104  */
105
106         .code
107         .export         mpihelp_rshift
108 mpihelp_rshift
109         .proc
110         .callinfo       frame=64,no_calls
111         .entry
112
113         ldws,ma         4(0,%r25),%r22
114         mtsar           %r23
115         addib,=         -1,%r24,L$r004
116         vshd            %r22,%r0,%r28           ; compute carry out limb
117         ldws,ma         4(0,%r25),%r29
118         addib,<=        -5,%r24,L$rrest
119         vshd            %r29,%r22,%r20
120
121 L$roop  ldws,ma         4(0,%r25),%r22
122         stws,ma         %r20,4(0,%r26)
123         vshd            %r22,%r29,%r20
124         ldws,ma         4(0,%r25),%r29
125         stws,ma         %r20,4(0,%r26)
126         vshd            %r29,%r22,%r20
127         ldws,ma         4(0,%r25),%r22
128         stws,ma         %r20,4(0,%r26)
129         vshd            %r22,%r29,%r20
130         ldws,ma         4(0,%r25),%r29
131         stws,ma         %r20,4(0,%r26)
132         addib,>         -4,%r24,L$roop
133         vshd            %r29,%r22,%r20
134
135 L$rrest addib,=         4,%r24,L$rend1
136         nop
137 L$eroop ldws,ma         4(0,%r25),%r22
138         stws,ma         %r20,4(0,%r26)
139         addib,<=        -1,%r24,L$rend2
140         vshd            %r22,%r29,%r20
141         ldws,ma         4(0,%r25),%r29
142         stws,ma         %r20,4(0,%r26)
143         addib,>         -1,%r24,L$eroop
144         vshd            %r29,%r22,%r20
145
146 L$rend1  stws,ma         %r20,4(0,%r26)
147         vshd            %r0,%r29,%r20
148         bv              0(%r2)
149         stw             %r20,0(0,%r26)
150 L$rend2  stws,ma         %r20,4(0,%r26)
151 L$r004  vshd            %r0,%r22,%r20
152         bv              0(%r2)
153         stw             %r20,0(0,%r26)
154
155         .exit
156         .procend
157
158