1 |
|
---|
2 | /*============================================================================
|
---|
3 |
|
---|
4 | This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
---|
5 | Package, Release 3e, by John R. Hauser.
|
---|
6 |
|
---|
7 | Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
|
---|
8 | California. All rights reserved.
|
---|
9 |
|
---|
10 | Redistribution and use in source and binary forms, with or without
|
---|
11 | modification, are permitted provided that the following conditions are met:
|
---|
12 |
|
---|
13 | 1. Redistributions of source code must retain the above copyright notice,
|
---|
14 | this list of conditions, and the following disclaimer.
|
---|
15 |
|
---|
16 | 2. Redistributions in binary form must reproduce the above copyright notice,
|
---|
17 | this list of conditions, and the following disclaimer in the documentation
|
---|
18 | and/or other materials provided with the distribution.
|
---|
19 |
|
---|
20 | 3. Neither the name of the University nor the names of its contributors may
|
---|
21 | be used to endorse or promote products derived from this software without
|
---|
22 | specific prior written permission.
|
---|
23 |
|
---|
24 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
---|
25 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
---|
26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
---|
27 | DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
---|
28 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
---|
29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
---|
30 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
---|
31 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
32 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
---|
33 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
34 |
|
---|
35 | =============================================================================*/
|
---|
36 |
|
---|
37 | #include <stdbool.h>
|
---|
38 | #include <stdint.h>
|
---|
39 | #include "platform.h"
|
---|
40 | #include "internals.h"
|
---|
41 | #include "specialize.h"
|
---|
42 | #include "softfloat.h"
|
---|
43 |
|
---|
44 | float128_t
|
---|
45 | softfloat_mulAddF128(
|
---|
46 | uint_fast64_t uiA64,
|
---|
47 | uint_fast64_t uiA0,
|
---|
48 | uint_fast64_t uiB64,
|
---|
49 | uint_fast64_t uiB0,
|
---|
50 | uint_fast64_t uiC64,
|
---|
51 | uint_fast64_t uiC0,
|
---|
52 | uint_fast8_t op
|
---|
53 | SOFTFLOAT_STATE_DECL_COMMA
|
---|
54 | )
|
---|
55 | {
|
---|
56 | bool signA;
|
---|
57 | int_fast32_t expA;
|
---|
58 | struct uint128 sigA;
|
---|
59 | bool signB;
|
---|
60 | int_fast32_t expB;
|
---|
61 | struct uint128 sigB;
|
---|
62 | bool signC;
|
---|
63 | int_fast32_t expC;
|
---|
64 | struct uint128 sigC;
|
---|
65 | bool signZ;
|
---|
66 | uint_fast64_t magBits;
|
---|
67 | struct uint128 uiZ;
|
---|
68 | struct exp32_sig128 normExpSig;
|
---|
69 | int_fast32_t expZ;
|
---|
70 | uint64_t sig256Z[4];
|
---|
71 | struct uint128 sigZ;
|
---|
72 | int_fast32_t shiftDist, expDiff;
|
---|
73 | struct uint128 x128;
|
---|
74 | uint64_t sig256C[4];
|
---|
75 | static uint64_t zero256[4] = INIT_UINTM4( 0, 0, 0, 0 );
|
---|
76 | uint_fast64_t sigZExtra, sig256Z0;
|
---|
77 | union ui128_f128 uZ;
|
---|
78 |
|
---|
79 | /*------------------------------------------------------------------------
|
---|
80 | *------------------------------------------------------------------------*/
|
---|
81 | signA = signF128UI64( uiA64 );
|
---|
82 | expA = expF128UI64( uiA64 );
|
---|
83 | sigA.v64 = fracF128UI64( uiA64 );
|
---|
84 | sigA.v0 = uiA0;
|
---|
85 | signB = signF128UI64( uiB64 );
|
---|
86 | expB = expF128UI64( uiB64 );
|
---|
87 | sigB.v64 = fracF128UI64( uiB64 );
|
---|
88 | sigB.v0 = uiB0;
|
---|
89 | signC = signF128UI64( uiC64 ) ^ (op == softfloat_mulAdd_subC);
|
---|
90 | expC = expF128UI64( uiC64 );
|
---|
91 | sigC.v64 = fracF128UI64( uiC64 );
|
---|
92 | sigC.v0 = uiC0;
|
---|
93 | signZ = signA ^ signB ^ (op == softfloat_mulAdd_subProd);
|
---|
94 | /*------------------------------------------------------------------------
|
---|
95 | *------------------------------------------------------------------------*/
|
---|
96 | if ( expA == 0x7FFF ) {
|
---|
97 | if (
|
---|
98 | (sigA.v64 | sigA.v0) || ((expB == 0x7FFF) && (sigB.v64 | sigB.v0))
|
---|
99 | ) {
|
---|
100 | goto propagateNaN_ABC;
|
---|
101 | }
|
---|
102 | magBits = expB | sigB.v64 | sigB.v0;
|
---|
103 | goto infProdArg;
|
---|
104 | }
|
---|
105 | if ( expB == 0x7FFF ) {
|
---|
106 | if ( sigB.v64 | sigB.v0 ) goto propagateNaN_ABC;
|
---|
107 | magBits = expA | sigA.v64 | sigA.v0;
|
---|
108 | goto infProdArg;
|
---|
109 | }
|
---|
110 | if ( expC == 0x7FFF ) {
|
---|
111 | if ( sigC.v64 | sigC.v0 ) {
|
---|
112 | uiZ.v64 = 0;
|
---|
113 | uiZ.v0 = 0;
|
---|
114 | goto propagateNaN_ZC;
|
---|
115 | }
|
---|
116 | uiZ.v64 = uiC64;
|
---|
117 | uiZ.v0 = uiC0;
|
---|
118 | goto uiZ;
|
---|
119 | }
|
---|
120 | /*------------------------------------------------------------------------
|
---|
121 | *------------------------------------------------------------------------*/
|
---|
122 | if ( ! expA ) {
|
---|
123 | if ( ! (sigA.v64 | sigA.v0) ) goto zeroProd;
|
---|
124 | normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
|
---|
125 | expA = normExpSig.exp;
|
---|
126 | sigA = normExpSig.sig;
|
---|
127 | }
|
---|
128 | if ( ! expB ) {
|
---|
129 | if ( ! (sigB.v64 | sigB.v0) ) goto zeroProd;
|
---|
130 | normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 );
|
---|
131 | expB = normExpSig.exp;
|
---|
132 | sigB = normExpSig.sig;
|
---|
133 | }
|
---|
134 | /*------------------------------------------------------------------------
|
---|
135 | *------------------------------------------------------------------------*/
|
---|
136 | expZ = expA + expB - 0x3FFE;
|
---|
137 | sigA.v64 |= UINT64_C( 0x0001000000000000 );
|
---|
138 | sigB.v64 |= UINT64_C( 0x0001000000000000 );
|
---|
139 | sigA = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 8 );
|
---|
140 | sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 15 );
|
---|
141 | softfloat_mul128To256M( sigA.v64, sigA.v0, sigB.v64, sigB.v0, sig256Z );
|
---|
142 | sigZ.v64 = sig256Z[indexWord( 4, 3 )];
|
---|
143 | sigZ.v0 = sig256Z[indexWord( 4, 2 )];
|
---|
144 | shiftDist = 0;
|
---|
145 | if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) {
|
---|
146 | --expZ;
|
---|
147 | shiftDist = -1;
|
---|
148 | }
|
---|
149 | if ( ! expC ) {
|
---|
150 | if ( ! (sigC.v64 | sigC.v0) ) {
|
---|
151 | shiftDist += 8;
|
---|
152 | goto sigZ;
|
---|
153 | }
|
---|
154 | normExpSig = softfloat_normSubnormalF128Sig( sigC.v64, sigC.v0 );
|
---|
155 | expC = normExpSig.exp;
|
---|
156 | sigC = normExpSig.sig;
|
---|
157 | }
|
---|
158 | sigC.v64 |= UINT64_C( 0x0001000000000000 );
|
---|
159 | sigC = softfloat_shortShiftLeft128( sigC.v64, sigC.v0, 8 );
|
---|
160 | /*------------------------------------------------------------------------
|
---|
161 | *------------------------------------------------------------------------*/
|
---|
162 | expDiff = expZ - expC;
|
---|
163 | if ( expDiff < 0 ) {
|
---|
164 | expZ = expC;
|
---|
165 | if ( (signZ == signC) || (expDiff < -1) ) {
|
---|
166 | shiftDist -= expDiff;
|
---|
167 | if ( shiftDist ) {
|
---|
168 | sigZ =
|
---|
169 | softfloat_shiftRightJam128( sigZ.v64, sigZ.v0, shiftDist );
|
---|
170 | }
|
---|
171 | } else {
|
---|
172 | if ( ! shiftDist ) {
|
---|
173 | x128 =
|
---|
174 | softfloat_shortShiftRight128(
|
---|
175 | sig256Z[indexWord( 4, 1 )], sig256Z[indexWord( 4, 0 )],
|
---|
176 | 1
|
---|
177 | );
|
---|
178 | sig256Z[indexWord( 4, 1 )] = (sigZ.v0<<63) | x128.v64;
|
---|
179 | sig256Z[indexWord( 4, 0 )] = x128.v0;
|
---|
180 | sigZ = softfloat_shortShiftRight128( sigZ.v64, sigZ.v0, 1 );
|
---|
181 | sig256Z[indexWord( 4, 3 )] = sigZ.v64;
|
---|
182 | sig256Z[indexWord( 4, 2 )] = sigZ.v0;
|
---|
183 | }
|
---|
184 | }
|
---|
185 | } else {
|
---|
186 | if ( shiftDist ) softfloat_add256M( sig256Z, sig256Z, sig256Z );
|
---|
187 | if ( ! expDiff ) {
|
---|
188 | sigZ.v64 = sig256Z[indexWord( 4, 3 )];
|
---|
189 | sigZ.v0 = sig256Z[indexWord( 4, 2 )];
|
---|
190 | } else {
|
---|
191 | sig256C[indexWord( 4, 3 )] = sigC.v64;
|
---|
192 | sig256C[indexWord( 4, 2 )] = sigC.v0;
|
---|
193 | sig256C[indexWord( 4, 1 )] = 0;
|
---|
194 | sig256C[indexWord( 4, 0 )] = 0;
|
---|
195 | softfloat_shiftRightJam256M( sig256C, expDiff, sig256C );
|
---|
196 | }
|
---|
197 | }
|
---|
198 | /*------------------------------------------------------------------------
|
---|
199 | *------------------------------------------------------------------------*/
|
---|
200 | shiftDist = 8;
|
---|
201 | if ( signZ == signC ) {
|
---|
202 | /*--------------------------------------------------------------------
|
---|
203 | *--------------------------------------------------------------------*/
|
---|
204 | if ( expDiff <= 0 ) {
|
---|
205 | sigZ = softfloat_add128( sigC.v64, sigC.v0, sigZ.v64, sigZ.v0 );
|
---|
206 | } else {
|
---|
207 | softfloat_add256M( sig256Z, sig256C, sig256Z );
|
---|
208 | sigZ.v64 = sig256Z[indexWord( 4, 3 )];
|
---|
209 | sigZ.v0 = sig256Z[indexWord( 4, 2 )];
|
---|
210 | }
|
---|
211 | if ( sigZ.v64 & UINT64_C( 0x0200000000000000 ) ) {
|
---|
212 | ++expZ;
|
---|
213 | shiftDist = 9;
|
---|
214 | }
|
---|
215 | } else {
|
---|
216 | /*--------------------------------------------------------------------
|
---|
217 | *--------------------------------------------------------------------*/
|
---|
218 | if ( expDiff < 0 ) {
|
---|
219 | signZ = signC;
|
---|
220 | if ( expDiff < -1 ) {
|
---|
221 | sigZ =
|
---|
222 | softfloat_sub128( sigC.v64, sigC.v0, sigZ.v64, sigZ.v0 );
|
---|
223 | sigZExtra =
|
---|
224 | sig256Z[indexWord( 4, 1 )] | sig256Z[indexWord( 4, 0 )];
|
---|
225 | if ( sigZExtra ) {
|
---|
226 | sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, 0, 1 );
|
---|
227 | }
|
---|
228 | if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) {
|
---|
229 | --expZ;
|
---|
230 | shiftDist = 7;
|
---|
231 | }
|
---|
232 | goto shiftRightRoundPack;
|
---|
233 | } else {
|
---|
234 | sig256C[indexWord( 4, 3 )] = sigC.v64;
|
---|
235 | sig256C[indexWord( 4, 2 )] = sigC.v0;
|
---|
236 | sig256C[indexWord( 4, 1 )] = 0;
|
---|
237 | sig256C[indexWord( 4, 0 )] = 0;
|
---|
238 | softfloat_sub256M( sig256C, sig256Z, sig256Z );
|
---|
239 | }
|
---|
240 | } else if ( ! expDiff ) {
|
---|
241 | sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, sigC.v64, sigC.v0 );
|
---|
242 | if (
|
---|
243 | ! (sigZ.v64 | sigZ.v0) && ! sig256Z[indexWord( 4, 1 )]
|
---|
244 | && ! sig256Z[indexWord( 4, 0 )]
|
---|
245 | ) {
|
---|
246 | goto completeCancellation;
|
---|
247 | }
|
---|
248 | sig256Z[indexWord( 4, 3 )] = sigZ.v64;
|
---|
249 | sig256Z[indexWord( 4, 2 )] = sigZ.v0;
|
---|
250 | if ( sigZ.v64 & UINT64_C( 0x8000000000000000 ) ) {
|
---|
251 | signZ = ! signZ;
|
---|
252 | softfloat_sub256M( zero256, sig256Z, sig256Z );
|
---|
253 | }
|
---|
254 | } else {
|
---|
255 | softfloat_sub256M( sig256Z, sig256C, sig256Z );
|
---|
256 | if ( 1 < expDiff ) {
|
---|
257 | sigZ.v64 = sig256Z[indexWord( 4, 3 )];
|
---|
258 | sigZ.v0 = sig256Z[indexWord( 4, 2 )];
|
---|
259 | if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) {
|
---|
260 | --expZ;
|
---|
261 | shiftDist = 7;
|
---|
262 | }
|
---|
263 | goto sigZ;
|
---|
264 | }
|
---|
265 | }
|
---|
266 | /*--------------------------------------------------------------------
|
---|
267 | *--------------------------------------------------------------------*/
|
---|
268 | sigZ.v64 = sig256Z[indexWord( 4, 3 )];
|
---|
269 | sigZ.v0 = sig256Z[indexWord( 4, 2 )];
|
---|
270 | sigZExtra = sig256Z[indexWord( 4, 1 )];
|
---|
271 | sig256Z0 = sig256Z[indexWord( 4, 0 )];
|
---|
272 | if ( sigZ.v64 ) {
|
---|
273 | if ( sig256Z0 ) sigZExtra |= 1;
|
---|
274 | } else {
|
---|
275 | expZ -= 64;
|
---|
276 | sigZ.v64 = sigZ.v0;
|
---|
277 | sigZ.v0 = sigZExtra;
|
---|
278 | sigZExtra = sig256Z0;
|
---|
279 | if ( ! sigZ.v64 ) {
|
---|
280 | expZ -= 64;
|
---|
281 | sigZ.v64 = sigZ.v0;
|
---|
282 | sigZ.v0 = sigZExtra;
|
---|
283 | sigZExtra = 0;
|
---|
284 | if ( ! sigZ.v64 ) {
|
---|
285 | expZ -= 64;
|
---|
286 | sigZ.v64 = sigZ.v0;
|
---|
287 | sigZ.v0 = 0;
|
---|
288 | }
|
---|
289 | }
|
---|
290 | }
|
---|
291 | shiftDist = softfloat_countLeadingZeros64( sigZ.v64 );
|
---|
292 | expZ += 7 - shiftDist;
|
---|
293 | shiftDist = 15 - shiftDist;
|
---|
294 | if ( 0 < shiftDist ) goto shiftRightRoundPack;
|
---|
295 | if ( shiftDist ) {
|
---|
296 | shiftDist = -shiftDist;
|
---|
297 | sigZ = softfloat_shortShiftLeft128( sigZ.v64, sigZ.v0, shiftDist );
|
---|
298 | x128 = softfloat_shortShiftLeft128( 0, sigZExtra, shiftDist );
|
---|
299 | sigZ.v0 |= x128.v64;
|
---|
300 | sigZExtra = x128.v0;
|
---|
301 | }
|
---|
302 | goto roundPack;
|
---|
303 | }
|
---|
304 | sigZ:
|
---|
305 | sigZExtra = sig256Z[indexWord( 4, 1 )] | sig256Z[indexWord( 4, 0 )];
|
---|
306 | shiftRightRoundPack:
|
---|
307 | sigZExtra = (uint64_t) (sigZ.v0<<(64 - shiftDist)) | (sigZExtra != 0);
|
---|
308 | sigZ = softfloat_shortShiftRight128( sigZ.v64, sigZ.v0, shiftDist );
|
---|
309 | roundPack:
|
---|
310 | return
|
---|
311 | softfloat_roundPackToF128(
|
---|
312 | signZ, expZ - 1, sigZ.v64, sigZ.v0, sigZExtra SOFTFLOAT_STATE_ARG_COMMA );
|
---|
313 | /*------------------------------------------------------------------------
|
---|
314 | *------------------------------------------------------------------------*/
|
---|
315 | propagateNaN_ABC:
|
---|
316 | uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 SOFTFLOAT_STATE_ARG_COMMA );
|
---|
317 | goto propagateNaN_ZC;
|
---|
318 | /*------------------------------------------------------------------------
|
---|
319 | *------------------------------------------------------------------------*/
|
---|
320 | infProdArg:
|
---|
321 | if ( magBits ) {
|
---|
322 | uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
|
---|
323 | uiZ.v0 = 0;
|
---|
324 | if ( expC != 0x7FFF ) goto uiZ;
|
---|
325 | if ( sigC.v64 | sigC.v0 ) goto propagateNaN_ZC;
|
---|
326 | if ( signZ == signC ) goto uiZ;
|
---|
327 | }
|
---|
328 | softfloat_raiseFlags( softfloat_flag_invalid SOFTFLOAT_STATE_ARG_COMMA );
|
---|
329 | uiZ.v64 = defaultNaNF128UI64;
|
---|
330 | uiZ.v0 = defaultNaNF128UI0;
|
---|
331 | propagateNaN_ZC:
|
---|
332 | uiZ = softfloat_propagateNaNF128UI( uiZ.v64, uiZ.v0, uiC64, uiC0 SOFTFLOAT_STATE_ARG_COMMA );
|
---|
333 | goto uiZ;
|
---|
334 | /*------------------------------------------------------------------------
|
---|
335 | *------------------------------------------------------------------------*/
|
---|
336 | zeroProd:
|
---|
337 | uiZ.v64 = uiC64;
|
---|
338 | uiZ.v0 = uiC0;
|
---|
339 | if ( ! (expC | sigC.v64 | sigC.v0) && (signZ != signC) ) {
|
---|
340 | completeCancellation:
|
---|
341 | uiZ.v64 =
|
---|
342 | packToF128UI64(
|
---|
343 | (softfloat_roundingMode == softfloat_round_min), 0, 0 );
|
---|
344 | uiZ.v0 = 0;
|
---|
345 | }
|
---|
346 | uiZ:
|
---|
347 | uZ.ui = uiZ;
|
---|
348 | return uZ.f;
|
---|
349 |
|
---|
350 | }
|
---|
351 |
|
---|