From 09419cf0c3a88aa760a5e79350b8b9020c515b5e Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 21 Dec 2010 15:36:04 -0800 Subject: [PATCH 1/9] ARM assembly fixes; implement Android makefile --- Android.mk | 106 ++++++++++++++++++++++++++++++++ src/mont/fp_montgomery_reduce.c | 6 +- 2 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 Android.mk diff --git a/Android.mk b/Android.mk new file mode 100644 index 0000000..dfc98dc --- /dev/null +++ b/Android.mk @@ -0,0 +1,106 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := tfm + +LOCAL_SRC_FILES := \ + src/addsub/fp_add.c \ + src/addsub/fp_add_d.c \ + src/addsub/fp_addmod.c \ + src/addsub/fp_cmp.c \ + src/addsub/fp_cmp_d.c \ + src/addsub/fp_cmp_mag.c \ + src/addsub/fp_sub.c \ + src/addsub/fp_sub_d.c \ + src/addsub/fp_submod.c \ + src/addsub/s_fp_add.c \ + src/addsub/s_fp_sub.c \ + src/bin/fp_radix_size.c \ + src/bin/fp_read_radix.c \ + src/bin/fp_read_signed_bin.c \ + src/bin/fp_read_unsigned_bin.c \ + src/bin/fp_reverse.c \ + src/bin/fp_signed_bin_size.c \ + src/bin/fp_s_rmap.c \ + src/bin/fp_toradix.c \ + src/bin/fp_to_signed_bin.c \ + src/bin/fp_to_unsigned_bin.c \ + src/bin/fp_unsigned_bin_size.c \ + src/bit/fp_cnt_lsb.c \ + src/bit/fp_count_bits.c \ + src/bit/fp_div_2.c \ + src/bit/fp_div_2d.c \ + src/bit/fp_lshd.c \ + src/bit/fp_mod_2d.c \ + src/bit/fp_rshd.c \ + src/divide/fp_div.c \ + src/divide/fp_div_d.c \ + src/divide/fp_mod.c \ + src/divide/fp_mod_d.c \ + src/exptmod/fp_2expt.c \ + src/exptmod/fp_exptmod.c \ + src/misc/fp_ident.c \ + src/misc/fp_set.c \ + src/mont/fp_montgomery_calc_normalization.c \ + src/mont/fp_montgomery_reduce.c \ + src/mont/fp_montgomery_setup.c \ + src/mul/fp_mul_2.c \ + src/mul/fp_mul_2d.c \ + src/mul/fp_mul.c \ + src/mul/fp_mul_comba_12.c \ + src/mul/fp_mul_comba_17.c \ + src/mul/fp_mul_comba_20.c \ + src/mul/fp_mul_comba_24.c \ + src/mul/fp_mul_comba_28.c \ + src/mul/fp_mul_comba_32.c \ + src/mul/fp_mul_comba_3.c \ + src/mul/fp_mul_comba_48.c \ + src/mul/fp_mul_comba_4.c \ + src/mul/fp_mul_comba_64.c \ + src/mul/fp_mul_comba_6.c \ + src/mul/fp_mul_comba_7.c \ + src/mul/fp_mul_comba_8.c \ + src/mul/fp_mul_comba_9.c \ + src/mul/fp_mul_comba.c \ + src/mul/fp_mul_comba_small_set.c \ + src/mul/fp_mul_d.c \ + src/mul/fp_mulmod.c \ + src/numtheory/fp_gcd.c \ + src/numtheory/fp_invmod.c \ + src/numtheory/fp_isprime.c \ + src/numtheory/fp_lcm.c \ + src/numtheory/fp_prime_miller_rabin.c \ + src/numtheory/fp_prime_random_ex.c \ + src/sqr/fp_sqr.c \ + src/sqr/fp_sqr_comba_12.c \ + src/sqr/fp_sqr_comba_17.c \ + src/sqr/fp_sqr_comba_20.c \ + src/sqr/fp_sqr_comba_24.c \ + src/sqr/fp_sqr_comba_28.c \ + src/sqr/fp_sqr_comba_32.c \ + src/sqr/fp_sqr_comba_3.c \ + src/sqr/fp_sqr_comba_48.c \ + src/sqr/fp_sqr_comba_4.c \ + src/sqr/fp_sqr_comba_64.c \ + src/sqr/fp_sqr_comba_6.c \ + src/sqr/fp_sqr_comba_7.c \ + src/sqr/fp_sqr_comba_8.c \ + src/sqr/fp_sqr_comba_9.c \ + src/sqr/fp_sqr_comba.c \ + src/sqr/fp_sqr_comba_generic.c \ + src/sqr/fp_sqr_comba_small_set.c \ + src/sqr/fp_sqrmod.c + +LOCAL_C_INCLUDES := $(LOCAL_PATH)/src/headers + +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) +# Possible optimizations: +# -ftree-vectorize: have GCC attempt to automatically vectorize loops +# -ftree-vectorizer-verbose=2: verbose output during compile +# Note: not all V7a targets support NEON! +# LOCAL_ARM_NEON := true +LOCAL_CFLAGS += -DTFM_ARM +endif + +include $(BUILD_STATIC_LIBRARY) diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index b8a194f..b36b10a 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -296,20 +296,22 @@ asm( \ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ + " ITE CS \n\t" \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ " UMLAL r0,%0,%3,%4 \n\t" \ " STR r0,%1 \n\t" \ -:"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); +:"=r"(cy),"=g"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); #define PROPCARRY \ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ " STR r0,%1 \n\t" \ + " ITE CS \n\t" \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ -:"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); +:"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); /******************************************************************/ #elif defined(TFM_PPC32) From 7164d56129ff4f70a324fd1a4cf9021970c7c9b9 Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 21 Dec 2010 15:56:49 -0800 Subject: [PATCH 2/9] add Android documentation and sample project files --- Android_jni/Android.mk | 2 ++ Android_jni/Application.mk | 3 +++ Android_jni/README | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 Android_jni/Android.mk create mode 100644 Android_jni/Application.mk create mode 100644 Android_jni/README diff --git a/Android_jni/Android.mk b/Android_jni/Android.mk new file mode 100644 index 0000000..487df6d --- /dev/null +++ b/Android_jni/Android.mk @@ -0,0 +1,2 @@ +# Recursively sources all Android.mk files in subdirs: +include $(call all-subdir-makefiles) diff --git a/Android_jni/Application.mk b/Android_jni/Application.mk new file mode 100644 index 0000000..d1e6201 --- /dev/null +++ b/Android_jni/Application.mk @@ -0,0 +1,3 @@ +APP_MODULES := tfm +APP_OPTIM := release +APP_ABI := armeabi armeabi-v7a diff --git a/Android_jni/README b/Android_jni/README new file mode 100644 index 0000000..6ba47f8 --- /dev/null +++ b/Android_jni/README @@ -0,0 +1,32 @@ +This project provides a build framework for TomsFastMath on Android. + +Notes / TODO: + +To use the faster ARM inline assembly, TFM_ARM must be defined at compile time. Unfortunately, is +incompatible with the armeabi build target because some of the instructions are not supported. The +Android SDK defines a few variables which may help with detection: + +when compiled for armeabi-v7a: + __thumb2__ + __ARM_ARCH_7A__ +when compiled for armeabi: + __SOFTFP__ + +For now, the armeabi target will use the unoptimized C code. TFM_ARM is conditionally defined +for the armeabi-v7a target. + +Build instructions: + +(1) Clone this git repository to your project's JNI directory, naming the directory "tfm". +(2) Modify your JNI root's Android.mk and Application.mk using the files in this directory as a model. +(3) Run ndk-build (available from the Android NDK); see Gingerbread's NDK docs for more. + +Known bugs / issues: + +* armeabi builds are known to produce binary code that is over three times larger than armeabi-v7a. + This is probably due to either a bug in the Android NDK or limitations of the Thumb16 instruction set. + +* NEON support can't really be turned on, because not all armeabi-v7a boards support it. + After some trials, it appears that gcc can auto-vectorize some loops when NEON support is enabled + and this leads to a performance increase. But doing so will cause the code to crash on phones + where NEON isn't supported... From ea0658df066117e8aa9a445c7080a03df9cfc6a1 Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 28 Dec 2010 10:57:24 -0800 Subject: [PATCH 3/9] let gcc handle the IT blocks implicitly --- Android.mk | 2 +- src/mont/fp_montgomery_reduce.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Android.mk b/Android.mk index dfc98dc..5f01b16 100644 --- a/Android.mk +++ b/Android.mk @@ -100,7 +100,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) # -ftree-vectorizer-verbose=2: verbose output during compile # Note: not all V7a targets support NEON! # LOCAL_ARM_NEON := true -LOCAL_CFLAGS += -DTFM_ARM +LOCAL_CFLAGS += -DTFM_ARM -mimplicit-it=thumb endif include $(BUILD_STATIC_LIBRARY) diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index b36b10a..71d9499 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -296,7 +296,6 @@ asm( \ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ - " ITE CS \n\t" \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ " UMLAL r0,%0,%3,%4 \n\t" \ @@ -308,7 +307,6 @@ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ " STR r0,%1 \n\t" \ - " ITE CS \n\t" \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ :"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); From 1933d528327001be709f02b1cbc28d3a8bfcc9ad Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 28 Dec 2010 11:17:28 -0800 Subject: [PATCH 4/9] reverse previous patch, implicit IT blocks are not supported by the Android NDK --- Android.mk | 2 +- Android_jni/README | 4 ++++ src/mont/fp_montgomery_reduce.c | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Android.mk b/Android.mk index 5f01b16..dfc98dc 100644 --- a/Android.mk +++ b/Android.mk @@ -100,7 +100,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) # -ftree-vectorizer-verbose=2: verbose output during compile # Note: not all V7a targets support NEON! # LOCAL_ARM_NEON := true -LOCAL_CFLAGS += -DTFM_ARM -mimplicit-it=thumb +LOCAL_CFLAGS += -DTFM_ARM endif include $(BUILD_STATIC_LIBRARY) diff --git a/Android_jni/README b/Android_jni/README index 6ba47f8..0aa3670 100644 --- a/Android_jni/README +++ b/Android_jni/README @@ -15,6 +15,10 @@ when compiled for armeabi: For now, the armeabi target will use the unoptimized C code. TFM_ARM is conditionally defined for the armeabi-v7a target. +Don't forget to change FP_MAX_SIZE in src/headers/tfm.h to suit your needs, e.g. + + #define FP_MAX_SIZE (16384+(8*DIGIT_BIT)) + Build instructions: (1) Clone this git repository to your project's JNI directory, naming the directory "tfm". diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index 71d9499..77a679d 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -296,6 +296,9 @@ asm( \ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ +#if defined(__thumb2__) + " ITE CS \n\t" \ +#endif " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ " UMLAL r0,%0,%3,%4 \n\t" \ @@ -307,6 +310,9 @@ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ " STR r0,%1 \n\t" \ +#if defined(__thumb2__) + " ITE CS \n\t" \ +#endif " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ :"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); From 8ec8e78bc768eb2828cd6a61b3cbeec57dc8f4c8 Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 28 Dec 2010 11:33:25 -0800 Subject: [PATCH 5/9] oops, fix inline assembly from last commit --- src/mont/fp_montgomery_reduce.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index 77a679d..6453554 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -296,9 +296,9 @@ asm( \ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ -#if defined(__thumb2__) + #if defined(__thumb2__) \ " ITE CS \n\t" \ -#endif + #endif \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ " UMLAL r0,%0,%3,%4 \n\t" \ @@ -310,9 +310,9 @@ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ " STR r0,%1 \n\t" \ -#if defined(__thumb2__) + #if defined(__thumb2__) \ " ITE CS \n\t" \ -#endif + #endif \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ :"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); From 7e63e06d1ec20de140863e44c703d9f0da596b6e Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 28 Dec 2010 12:33:26 -0800 Subject: [PATCH 6/9] split out ARM support for Android targets --- Android.mk | 4 ++ src/mont/fp_montgomery_reduce.c | 65 ++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/Android.mk b/Android.mk index dfc98dc..585e47b 100644 --- a/Android.mk +++ b/Android.mk @@ -100,6 +100,10 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) # -ftree-vectorizer-verbose=2: verbose output during compile # Note: not all V7a targets support NEON! # LOCAL_ARM_NEON := true +LOCAL_CFLAGS += -DTFM_ARM_V7A +else ifeq ($(TARGET_ARCH_ABI),armeabi) +LOCAL_CFLAGS += -DTFM_ARM_V5TE +else LOCAL_CFLAGS += -DTFM_ARM endif diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index 6453554..5cc833b 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -296,9 +296,6 @@ asm( \ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ - #if defined(__thumb2__) \ - " ITE CS \n\t" \ - #endif \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ " UMLAL r0,%0,%3,%4 \n\t" \ @@ -310,9 +307,65 @@ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ " STR r0,%1 \n\t" \ - #if defined(__thumb2__) \ - " ITE CS \n\t" \ - #endif \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ +:"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); + +/******************************************************************/ +#elif defined(TFM_ARM_V7A) + /* Android: armeabi-v7a target */ + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " ITE CS \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + " UMLAL r0,%0,%3,%4 \n\t" \ + " STR r0,%1 \n\t" \ +:"=r"(cy),"=g"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); + +#define PROPCARRY \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " STR r0,%1 \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ +:"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); + +/******************************************************************/ +#elif defined(TFM_ARM_V5TE) + /* Android: armeabi target */ + +#define MONT_START +#define MONT_FINI +#define LOOP_END +#define LOOP_START \ + mu = c[x] * mp + +#define INNERMUL \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " MOVCS %0,#1 \n\t" \ + " MOVCC %0,#0 \n\t" \ + " UMLAL r0,%0,%3,%4 \n\t" \ + " STR r0,%1 \n\t" \ +:"=r"(cy),"=g"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); + +#define PROPCARRY \ +asm( \ + " LDR r0,%1 \n\t" \ + " ADDS r0,r0,%0 \n\t" \ + " STR r0,%1 \n\t" \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ :"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); From e077338f6f18768b626630687a87c10748e7093f Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 28 Dec 2010 12:38:50 -0800 Subject: [PATCH 7/9] oops, forgot second IT block --- src/mont/fp_montgomery_reduce.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index 5cc833b..63a3db6 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -337,6 +337,7 @@ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ " STR r0,%1 \n\t" \ + " ITE CS \n\t" \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ :"=r"(cy),"=g"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); From dcb23d1e42caeddc1ff5c1f75e77b31be2bfd60f Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 28 Dec 2010 13:23:34 -0800 Subject: [PATCH 8/9] TFM_ARM must be defined for other parts of the code --- Android.mk | 10 ++++++---- src/mont/fp_montgomery_reduce.c | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Android.mk b/Android.mk index 585e47b..e9f691d 100644 --- a/Android.mk +++ b/Android.mk @@ -94,17 +94,19 @@ LOCAL_SRC_FILES := \ LOCAL_C_INCLUDES := $(LOCAL_PATH)/src/headers +LOCAL_CFLAGS += -DTFM_ARM + ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) # Possible optimizations: # -ftree-vectorize: have GCC attempt to automatically vectorize loops # -ftree-vectorizer-verbose=2: verbose output during compile -# Note: not all V7a targets support NEON! -# LOCAL_ARM_NEON := true -LOCAL_CFLAGS += -DTFM_ARM_V7A +# Note: not all V7-a targets support NEON! +LOCAL_ARM_NEON := true +LOCAL_CFLAGS += -DTFM_ARM_V7A -ftree-vectorize else ifeq ($(TARGET_ARCH_ABI),armeabi) LOCAL_CFLAGS += -DTFM_ARM_V5TE else -LOCAL_CFLAGS += -DTFM_ARM +LOCAL_CFLAGS += -DTFM_ARM_V4M endif include $(BUILD_STATIC_LIBRARY) diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index 63a3db6..8ed733b 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -283,10 +283,10 @@ asm( \ : "%eax", "%cc") /******************************************************************/ -#elif defined(TFM_ARM) - /* ARMv4 code */ +#elif defined(TFM_ARM_V4M) + /* generic ARMv4 or higher with M */ -#define MONT_START +#define MONT_START #define MONT_FINI #define LOOP_END #define LOOP_START \ @@ -325,7 +325,7 @@ asm( \ asm( \ " LDR r0,%1 \n\t" \ " ADDS r0,r0,%0 \n\t" \ - " ITE CS \n\t" \ + " ITE CS \n\t" \ " MOVCS %0,#1 \n\t" \ " MOVCC %0,#0 \n\t" \ " UMLAL r0,%0,%3,%4 \n\t" \ From d0f54a4100364b729e29d2c2a312e222a914a609 Mon Sep 17 00:00:00 2001 From: Michael Mohr Date: Tue, 28 Dec 2010 14:25:35 -0800 Subject: [PATCH 9/9] document what must be fixed for armeabi --- src/mont/fp_montgomery_reduce.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mont/fp_montgomery_reduce.c b/src/mont/fp_montgomery_reduce.c index 8ed733b..396087a 100644 --- a/src/mont/fp_montgomery_reduce.c +++ b/src/mont/fp_montgomery_reduce.c @@ -352,6 +352,16 @@ asm( \ #define LOOP_START \ mu = c[x] * mp +/* +These will need a complete rewrite for armeabi: + * ADDS is not supported in Thumb16 mode + * Thumb does not support conditional execution (MOVCS/MOVCC) + * armv5te+xscale does not support UMLAL +ADDS - add and set condition flags (bottom of page 16, arm_inst.pdf) +MOVCS / MOVCC - move if carry bit is set / clear +UMLAL RdLo, RdHi, Rn, Rm - unsigned multiply Rn and Rm, then + add resulting 64-bit value to (RdHi,RdLo) +*/ #define INNERMUL \ asm( \ " LDR r0,%1 \n\t" \