asm-math.h@ 35199

Last change on this file since 35199 was 29257, checked in by vboxsync, 15 years ago
asm-math.h: opps (ASMMultU64ByU32DivByU32)
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 9.4 KB

Line
1	/** @file
2	* IPRT - Assembly Routines for Optimizing some Integers Math Operations.
3	*/
4
5	/*
6	* Copyright (C) 2006-2010 Oracle Corporation
7	*
8	* This file is part of VirtualBox Open Source Edition (OSE), as
9	* available from http://www.virtualbox.org. This file is free software;
10	* you can redistribute it and/or modify it under the terms of the GNU
11	* General Public License (GPL) as published by the Free Software
12	* Foundation, in version 2 as it comes in the "COPYING" file of the
13	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15	*
16	* The contents of this file may alternatively be used under the terms
17	* of the Common Development and Distribution License Version 1.0
18	* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19	* VirtualBox OSE distribution, in which case the provisions of the
20	* CDDL are applicable instead of those of the GPL.
21	*
22	* You may elect to license modified versions of this file under the
23	* terms and conditions of either the GPL or the CDDL or both.
24	*/
25
26	#ifndef ___iprt_asm_math_h
27	#define ___iprt_asm_math_h
28
29	#include <iprt/types.h>
30
31
32	/** @defgroup grp_rt_asm_math Interger Math Optimizations
33	* @ingroup grp_rt_asm
34	* @{ */
35
36	/**
37	* Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
38	*
39	* @returns u32F1 * u32F2.
40	*/
41	#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
42	DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
43	#else
44	DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
45	{
46	# ifdef RT_ARCH_X86
47	uint64_t u64;
48	# if RT_INLINE_ASM_GNU_STYLE
49	__asm__ __volatile__("mull %%edx"
50	: "=A" (u64)
51	: "a" (u32F2), "d" (u32F1));
52	# else
53	__asm
54	{
55	mov edx, [u32F1]
56	mov eax, [u32F2]
57	mul edx
58	mov dword ptr [u64], eax
59	mov dword ptr [u64 + 4], edx
60	}
61	# endif
62	return u64;
63	# else /* generic: */
64	return (uint64_t)u32F1 * u32F2;
65	# endif
66	}
67	#endif
68
69
70	/**
71	* Multiplies two signed 32-bit values returning a signed 64-bit result.
72	*
73	* @returns u32F1 * u32F2.
74	*/
75	#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
76	DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
77	#else
78	DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
79	{
80	# ifdef RT_ARCH_X86
81	int64_t i64;
82	# if RT_INLINE_ASM_GNU_STYLE
83	__asm__ __volatile__("imull %%edx"
84	: "=A" (i64)
85	: "a" (i32F2), "d" (i32F1));
86	# else
87	__asm
88	{
89	mov edx, [i32F1]
90	mov eax, [i32F2]
91	imul edx
92	mov dword ptr [i64], eax
93	mov dword ptr [i64 + 4], edx
94	}
95	# endif
96	return i64;
97	# else /* generic: */
98	return (int64_t)i32F1 * i32F2;
99	# endif
100	}
101	#endif
102
103
104	/**
105	* Divides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
106	*
107	* @returns u64 / u32.
108	*/
109	#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
110	DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
111	#else
112	DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
113	{
114	# ifdef RT_ARCH_X86
115	# if RT_INLINE_ASM_GNU_STYLE
116	RTCCUINTREG uDummy;
117	__asm__ __volatile__("divl %3"
118	: "=a" (u32), "=d"(uDummy)
119	: "A" (u64), "r" (u32));
120	# else
121	__asm
122	{
123	mov eax, dword ptr [u64]
124	mov edx, dword ptr [u64 + 4]
125	mov ecx, [u32]
126	div ecx
127	mov [u32], eax
128	}
129	# endif
130	return u32;
131	# else /* generic: */
132	return (uint32_t)(u64 / u32);
133	# endif
134	}
135	#endif
136
137
138	/**
139	* Divides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
140	*
141	* @returns u64 / u32.
142	*/
143	#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
144	DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
145	#else
146	DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
147	{
148	# ifdef RT_ARCH_X86
149	# if RT_INLINE_ASM_GNU_STYLE
150	RTCCUINTREG iDummy;
151	__asm__ __volatile__("idivl %3"
152	: "=a" (i32), "=d"(iDummy)
153	: "A" (i64), "r" (i32));
154	# else
155	__asm
156	{
157	mov eax, dword ptr [i64]
158	mov edx, dword ptr [i64 + 4]
159	mov ecx, [i32]
160	idiv ecx
161	mov [i32], eax
162	}
163	# endif
164	return i32;
165	# else /* generic: */
166	return (int32_t)(i64 / i32);
167	# endif
168	}
169	#endif
170
171
172	/**
173	* Performs 64-bit unsigned by a 32-bit unsigned division with a 32-bit unsigned result,
174	* returning the rest.
175	*
176	* @returns u64 % u32.
177	*
178	* @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
179	*/
180	#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
181	DECLASM(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32);
182	#else
183	DECLINLINE(uint32_t) ASMModU64ByU32RetU32(uint64_t u64, uint32_t u32)
184	{
185	# ifdef RT_ARCH_X86
186	# if RT_INLINE_ASM_GNU_STYLE
187	RTCCUINTREG uDummy;
188	__asm__ __volatile__("divl %3"
189	: "=a" (uDummy), "=d"(u32)
190	: "A" (u64), "r" (u32));
191	# else
192	__asm
193	{
194	mov eax, dword ptr [u64]
195	mov edx, dword ptr [u64 + 4]
196	mov ecx, [u32]
197	div ecx
198	mov [u32], edx
199	}
200	# endif
201	return u32;
202	# else /* generic: */
203	return (uint32_t)(u64 % u32);
204	# endif
205	}
206	#endif
207
208
209	/**
210	* Performs 64-bit signed by a 32-bit signed division with a 32-bit signed result,
211	* returning the rest.
212	*
213	* @returns u64 % u32.
214	*
215	* @remarks It is important that the result is <= UINT32_MAX or we'll overflow and crash.
216	*/
217	#if RT_INLINE_ASM_EXTERNAL && defined(RT_ARCH_X86)
218	DECLASM(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32);
219	#else
220	DECLINLINE(int32_t) ASMModS64ByS32RetS32(int64_t i64, int32_t i32)
221	{
222	# ifdef RT_ARCH_X86
223	# if RT_INLINE_ASM_GNU_STYLE
224	RTCCUINTREG iDummy;
225	__asm__ __volatile__("idivl %3"
226	: "=a" (iDummy), "=d"(i32)
227	: "A" (i64), "r" (i32));
228	# else
229	__asm
230	{
231	mov eax, dword ptr [i64]
232	mov edx, dword ptr [i64 + 4]
233	mov ecx, [i32]
234	idiv ecx
235	mov [i32], edx
236	}
237	# endif
238	return i32;
239	# else /* generic: */
240	return (int32_t)(i64 % i32);
241	# endif
242	}
243	#endif
244
245
246	/**
247	* Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
248	* using a 96 bit intermediate result.
249	* @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
250	* __udivdi3 and __umoddi3 even if this inline function is not used.
251	*
252	* @returns (u64A * u32B) / u32C.
253	* @param u64A The 64-bit value.
254	* @param u32B The 32-bit value to multiple by A.
255	* @param u32C The 32-bit value to divide A*B by.
256	*
257	* @remarks Architecture specific.
258	*/
259	#if RT_INLINE_ASM_EXTERNAL \|\| !defined(__GNUC__) \|\| (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
260	DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
261	#else
262	DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
263	{
264	# if RT_INLINE_ASM_GNU_STYLE
265	# ifdef RT_ARCH_AMD64
266	uint64_t u64Result, u64Spill;
267	__asm__ __volatile__("mulq %2\n\t"
268	"divq %3\n\t"
269	: "=a" (u64Result),
270	"=d" (u64Spill)
271	: "r" ((uint64_t)u32B),
272	"r" ((uint64_t)u32C),
273	"0" (u64A),
274	"1" (0));
275	return u64Result;
276	# else
277	uint32_t u32Dummy;
278	uint64_t u64Result;
279	__asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
280	edx = u64Lo.hi = (u64A.lo * u32B).hi */
281	"xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
282	eax = u64A.hi */
283	"xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
284	edx = u32C */
285	"xchg %%edx,%%ecx \n\t" /* ecx = u32C
286	edx = u32B */
287	"mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
288	edx = u64Hi.hi = (u64A.hi * u32B).hi */
289	"addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
290	"adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
291	"divl %%ecx \n\t" /* eax = u64Hi / u32C
292	edx = u64Hi % u32C */
293	"movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
294	"movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
295	"divl %%ecx \n\t" /* u64Result.lo */
296	"movl %%edi,%%edx \n\t" /* u64Result.hi */
297	: "=A"(u64Result), "=c"(u32Dummy),
298	"=S"(u32Dummy), "=D"(u32Dummy)
299	: "a"((uint32_t)u64A),
300	"S"((uint32_t)(u64A >> 32)),
301	"c"(u32B),
302	"D"(u32C));
303	return u64Result;
304	# endif
305	# else
306	RTUINT64U u;
307	uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
308	uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
309	u64Hi += (u64Lo >> 32);
310	u.s.Hi = (uint32_t)(u64Hi / u32C);
311	u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
312	return u.u;
313	# endif
314	}
315	#endif
316
317	/** @} */
318	#endif
319

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/include/iprt/asm-math.h@ 35199

Download in other formats: