wolfSSL SSL/TLS library, support up to TLS1.3

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Committer:
wolfSSL
Date:
Fri Jun 05 00:11:07 2020 +0000
Revision:
17:a5f916481144
Parent:
16:8e0d178b1d1e
wolfSSL 4.4.0

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wolfSSL 15:117db924cf7c 1 /* asm.c
wolfSSL 15:117db924cf7c 2 *
wolfSSL 16:8e0d178b1d1e 3 * Copyright (C) 2006-2020 wolfSSL Inc.
wolfSSL 15:117db924cf7c 4 *
wolfSSL 15:117db924cf7c 5 * This file is part of wolfSSL.
wolfSSL 15:117db924cf7c 6 *
wolfSSL 15:117db924cf7c 7 * wolfSSL is free software; you can redistribute it and/or modify
wolfSSL 15:117db924cf7c 8 * it under the terms of the GNU General Public License as published by
wolfSSL 15:117db924cf7c 9 * the Free Software Foundation; either version 2 of the License, or
wolfSSL 15:117db924cf7c 10 * (at your option) any later version.
wolfSSL 15:117db924cf7c 11 *
wolfSSL 15:117db924cf7c 12 * wolfSSL is distributed in the hope that it will be useful,
wolfSSL 15:117db924cf7c 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
wolfSSL 15:117db924cf7c 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
wolfSSL 15:117db924cf7c 15 * GNU General Public License for more details.
wolfSSL 15:117db924cf7c 16 *
wolfSSL 15:117db924cf7c 17 * You should have received a copy of the GNU General Public License
wolfSSL 15:117db924cf7c 18 * along with this program; if not, write to the Free Software
wolfSSL 15:117db924cf7c 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
wolfSSL 15:117db924cf7c 20 */
wolfSSL 15:117db924cf7c 21
wolfSSL 15:117db924cf7c 22
wolfSSL 15:117db924cf7c 23 #ifdef HAVE_CONFIG_H
wolfSSL 15:117db924cf7c 24 #include <config.h>
wolfSSL 15:117db924cf7c 25 #endif
wolfSSL 15:117db924cf7c 26
wolfSSL 15:117db924cf7c 27 #include <wolfssl/wolfcrypt/settings.h>
wolfSSL 15:117db924cf7c 28
wolfSSL 15:117db924cf7c 29 /*
wolfSSL 15:117db924cf7c 30 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
wolfSSL 15:117db924cf7c 31 * http://math.libtomcrypt.com
wolfSSL 15:117db924cf7c 32 */
wolfSSL 15:117db924cf7c 33
wolfSSL 15:117db924cf7c 34
wolfSSL 15:117db924cf7c 35 /******************************************************************/
wolfSSL 15:117db924cf7c 36 /* fp_montgomery_reduce.c asm or generic */
wolfSSL 15:117db924cf7c 37
wolfSSL 15:117db924cf7c 38
wolfSSL 15:117db924cf7c 39 /* Each platform needs to query info type 1 from cpuid to see if aesni is
wolfSSL 15:117db924cf7c 40 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
wolfSSL 15:117db924cf7c 41 */
wolfSSL 15:117db924cf7c 42
wolfSSL 15:117db924cf7c 43 #if defined(HAVE_INTEL_MULX)
wolfSSL 15:117db924cf7c 44 #ifndef _MSC_VER
wolfSSL 15:117db924cf7c 45 #define cpuid(reg, leaf, sub)\
wolfSSL 15:117db924cf7c 46 __asm__ __volatile__ ("cpuid":\
wolfSSL 15:117db924cf7c 47 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
wolfSSL 15:117db924cf7c 48 "a" (leaf), "c"(sub));
wolfSSL 15:117db924cf7c 49
wolfSSL 15:117db924cf7c 50 #define XASM_LINK(f) asm(f)
wolfSSL 15:117db924cf7c 51 #else
wolfSSL 15:117db924cf7c 52
wolfSSL 15:117db924cf7c 53 #include <intrin.h>
wolfSSL 15:117db924cf7c 54 #define cpuid(a,b,c) __cpuidex((int*)a,b,c)
wolfSSL 15:117db924cf7c 55
wolfSSL 15:117db924cf7c 56 #define XASM_LINK(f)
wolfSSL 15:117db924cf7c 57
wolfSSL 15:117db924cf7c 58 #endif /* _MSC_VER */
wolfSSL 15:117db924cf7c 59
wolfSSL 15:117db924cf7c 60 #define EAX 0
wolfSSL 15:117db924cf7c 61 #define EBX 1
wolfSSL 15:117db924cf7c 62 #define ECX 2
wolfSSL 15:117db924cf7c 63 #define EDX 3
wolfSSL 15:117db924cf7c 64
wolfSSL 15:117db924cf7c 65 #define CPUID_AVX1 0x1
wolfSSL 15:117db924cf7c 66 #define CPUID_AVX2 0x2
wolfSSL 15:117db924cf7c 67 #define CPUID_RDRAND 0x4
wolfSSL 15:117db924cf7c 68 #define CPUID_RDSEED 0x8
wolfSSL 15:117db924cf7c 69 #define CPUID_BMI2 0x10 /* MULX, RORX */
wolfSSL 15:117db924cf7c 70 #define CPUID_ADX 0x20 /* ADCX, ADOX */
wolfSSL 15:117db924cf7c 71
wolfSSL 15:117db924cf7c 72 #define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1)
wolfSSL 15:117db924cf7c 73 #define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2)
wolfSSL 15:117db924cf7c 74 #define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2)
wolfSSL 15:117db924cf7c 75 #define IS_INTEL_ADX (cpuid_flags&CPUID_ADX)
wolfSSL 15:117db924cf7c 76 #define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
wolfSSL 15:117db924cf7c 77 #define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
wolfSSL 15:117db924cf7c 78 #define SET_FLAGS
wolfSSL 15:117db924cf7c 79
wolfSSL 15:117db924cf7c 80 static word32 cpuid_check = 0 ;
wolfSSL 15:117db924cf7c 81 static word32 cpuid_flags = 0 ;
wolfSSL 15:117db924cf7c 82
wolfSSL 15:117db924cf7c 83 static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
wolfSSL 15:117db924cf7c 84 int got_intel_cpu = 0;
wolfSSL 15:117db924cf7c 85 int got_amd_cpu = 0;
wolfSSL 15:117db924cf7c 86 unsigned int reg[5];
wolfSSL 15:117db924cf7c 87
wolfSSL 15:117db924cf7c 88 reg[4] = '\0' ;
wolfSSL 15:117db924cf7c 89 cpuid(reg, 0, 0);
wolfSSL 15:117db924cf7c 90
wolfSSL 15:117db924cf7c 91 /* check for intel cpu */
wolfSSL 15:117db924cf7c 92 if( memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
wolfSSL 15:117db924cf7c 93 memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
wolfSSL 15:117db924cf7c 94 memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
wolfSSL 15:117db924cf7c 95 got_intel_cpu = 1;
wolfSSL 15:117db924cf7c 96 }
wolfSSL 15:117db924cf7c 97
wolfSSL 15:117db924cf7c 98 /* check for AMD cpu */
wolfSSL 15:117db924cf7c 99 if( memcmp((char *)&(reg[EBX]), "Auth", 4) == 0 &&
wolfSSL 15:117db924cf7c 100 memcmp((char *)&(reg[EDX]), "enti", 4) == 0 &&
wolfSSL 15:117db924cf7c 101 memcmp((char *)&(reg[ECX]), "cAMD", 4) == 0) {
wolfSSL 15:117db924cf7c 102 got_amd_cpu = 1;
wolfSSL 15:117db924cf7c 103 }
wolfSSL 15:117db924cf7c 104 if (got_intel_cpu || got_amd_cpu) {
wolfSSL 15:117db924cf7c 105 cpuid(reg, leaf, sub);
wolfSSL 15:117db924cf7c 106 return((reg[num]>>bit)&0x1) ;
wolfSSL 15:117db924cf7c 107 }
wolfSSL 15:117db924cf7c 108 return 0 ;
wolfSSL 15:117db924cf7c 109 }
wolfSSL 15:117db924cf7c 110
wolfSSL 15:117db924cf7c 111 WC_INLINE static int set_cpuid_flags(void) {
wolfSSL 15:117db924cf7c 112 if(cpuid_check == 0) {
wolfSSL 15:117db924cf7c 113 if(cpuid_flag(7, 0, EBX, 8)){ cpuid_flags |= CPUID_BMI2 ; }
wolfSSL 15:117db924cf7c 114 if(cpuid_flag(7, 0, EBX,19)){ cpuid_flags |= CPUID_ADX ; }
wolfSSL 15:117db924cf7c 115 cpuid_check = 1 ;
wolfSSL 15:117db924cf7c 116 return 0 ;
wolfSSL 15:117db924cf7c 117 }
wolfSSL 15:117db924cf7c 118 return 1 ;
wolfSSL 15:117db924cf7c 119 }
wolfSSL 15:117db924cf7c 120
wolfSSL 15:117db924cf7c 121 #define RETURN return
wolfSSL 15:117db924cf7c 122 #define IF_HAVE_INTEL_MULX(func, ret) \
wolfSSL 15:117db924cf7c 123 if(cpuid_check==0)set_cpuid_flags() ; \
wolfSSL 15:117db924cf7c 124 if(IS_INTEL_BMI2 && IS_INTEL_ADX){ func; ret ; }
wolfSSL 15:117db924cf7c 125
wolfSSL 15:117db924cf7c 126 #else
wolfSSL 15:117db924cf7c 127 #define IF_HAVE_INTEL_MULX(func, ret)
wolfSSL 15:117db924cf7c 128 #endif
wolfSSL 15:117db924cf7c 129
wolfSSL 15:117db924cf7c 130 #if defined(TFM_X86) && !defined(TFM_SSE2)
wolfSSL 15:117db924cf7c 131 /* x86-32 code */
wolfSSL 15:117db924cf7c 132
wolfSSL 15:117db924cf7c 133 #define MONT_START
wolfSSL 15:117db924cf7c 134 #define MONT_FINI
wolfSSL 15:117db924cf7c 135 #define LOOP_END
wolfSSL 15:117db924cf7c 136 #define LOOP_START \
wolfSSL 15:117db924cf7c 137 mu = c[x] * mp
wolfSSL 15:117db924cf7c 138
wolfSSL 15:117db924cf7c 139 #define INNERMUL \
wolfSSL 15:117db924cf7c 140 __asm__( \
wolfSSL 15:117db924cf7c 141 "movl %5,%%eax \n\t" \
wolfSSL 15:117db924cf7c 142 "mull %4 \n\t" \
wolfSSL 15:117db924cf7c 143 "addl %1,%%eax \n\t" \
wolfSSL 15:117db924cf7c 144 "adcl $0,%%edx \n\t" \
wolfSSL 15:117db924cf7c 145 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 146 "adcl $0,%%edx \n\t" \
wolfSSL 15:117db924cf7c 147 "movl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 148 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 15:117db924cf7c 149 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
wolfSSL 15:117db924cf7c 150 : "%eax", "%edx", "cc")
wolfSSL 15:117db924cf7c 151
wolfSSL 15:117db924cf7c 152 #define PROPCARRY \
wolfSSL 15:117db924cf7c 153 __asm__( \
wolfSSL 15:117db924cf7c 154 "addl %1,%0 \n\t" \
wolfSSL 15:117db924cf7c 155 "setb %%al \n\t" \
wolfSSL 15:117db924cf7c 156 "movzbl %%al,%1 \n\t" \
wolfSSL 15:117db924cf7c 157 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 15:117db924cf7c 158 :"0"(_c[LO]), "1"(cy) \
wolfSSL 15:117db924cf7c 159 : "%eax", "cc")
wolfSSL 15:117db924cf7c 160
wolfSSL 15:117db924cf7c 161 /******************************************************************/
wolfSSL 15:117db924cf7c 162 #elif defined(TFM_X86_64)
wolfSSL 15:117db924cf7c 163 /* x86-64 code */
wolfSSL 15:117db924cf7c 164
wolfSSL 15:117db924cf7c 165 #define MONT_START
wolfSSL 15:117db924cf7c 166 #define MONT_FINI
wolfSSL 15:117db924cf7c 167 #define LOOP_END
wolfSSL 15:117db924cf7c 168 #define LOOP_START \
wolfSSL 15:117db924cf7c 169 mu = c[x] * mp
wolfSSL 15:117db924cf7c 170
wolfSSL 15:117db924cf7c 171 #define INNERMUL \
wolfSSL 15:117db924cf7c 172 __asm__( \
wolfSSL 15:117db924cf7c 173 "movq %5,%%rax \n\t" \
wolfSSL 15:117db924cf7c 174 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 175 "addq %1,%%rax \n\t" \
wolfSSL 15:117db924cf7c 176 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 177 "addq %%rax,%0 \n\t" \
wolfSSL 15:117db924cf7c 178 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 179 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 180 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 15:117db924cf7c 181 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
wolfSSL 15:117db924cf7c 182 : "%rax", "%rdx", "cc")
wolfSSL 15:117db924cf7c 183
wolfSSL 15:117db924cf7c 184 #if defined(HAVE_INTEL_MULX)
wolfSSL 15:117db924cf7c 185 #define MULX_INNERMUL8(x,y,z,cy) \
wolfSSL 15:117db924cf7c 186 __asm__ volatile ( \
wolfSSL 15:117db924cf7c 187 "movq %[yn], %%rdx\n\t" \
wolfSSL 15:117db924cf7c 188 "xorq %%rcx, %%rcx\n\t" \
wolfSSL 15:117db924cf7c 189 "movq 0(%[c]), %%r8\n\t" \
wolfSSL 15:117db924cf7c 190 "movq 8(%[c]), %%r9\n\t" \
wolfSSL 15:117db924cf7c 191 "movq 16(%[c]), %%r10\n\t" \
wolfSSL 15:117db924cf7c 192 "movq 24(%[c]), %%r11\n\t" \
wolfSSL 15:117db924cf7c 193 "movq 32(%[c]), %%r12\n\t" \
wolfSSL 15:117db924cf7c 194 "movq 40(%[c]), %%r13\n\t" \
wolfSSL 15:117db924cf7c 195 "movq 48(%[c]), %%r14\n\t" \
wolfSSL 15:117db924cf7c 196 "movq 56(%[c]), %%r15\n\t" \
wolfSSL 15:117db924cf7c 197 \
wolfSSL 15:117db924cf7c 198 "mulx 0(%[xp]), %%rax, %%rcx\n\t" \
wolfSSL 15:117db924cf7c 199 "adcxq %[cy], %%r8\n\t" \
wolfSSL 15:117db924cf7c 200 "adoxq %%rax, %%r8\n\t" \
wolfSSL 15:117db924cf7c 201 "mulx 8(%[xp]), %%rax, %[cy]\n\t" \
wolfSSL 15:117db924cf7c 202 "adcxq %%rcx, %%r9\n\t" \
wolfSSL 15:117db924cf7c 203 "adoxq %%rax, %%r9\n\t" \
wolfSSL 15:117db924cf7c 204 "mulx 16(%[xp]), %%rax, %%rcx\n\t" \
wolfSSL 15:117db924cf7c 205 "adcxq %[cy], %%r10\n\t" \
wolfSSL 15:117db924cf7c 206 "adoxq %%rax, %%r10\n\t" \
wolfSSL 15:117db924cf7c 207 "mulx 24(%[xp]), %%rax, %[cy]\n\t" \
wolfSSL 15:117db924cf7c 208 "adcxq %%rcx, %%r11\n\t" \
wolfSSL 15:117db924cf7c 209 "adoxq %%rax, %%r11\n\t" \
wolfSSL 15:117db924cf7c 210 "mulx 32(%[xp]), %%rax, %%rcx\n\t" \
wolfSSL 15:117db924cf7c 211 "adcxq %[cy], %%r12\n\t" \
wolfSSL 15:117db924cf7c 212 "adoxq %%rax, %%r12\n\t" \
wolfSSL 15:117db924cf7c 213 "mulx 40(%[xp]), %%rax, %[cy]\n\t" \
wolfSSL 15:117db924cf7c 214 "adcxq %%rcx, %%r13\n\t" \
wolfSSL 15:117db924cf7c 215 "adoxq %%rax, %%r13\n\t" \
wolfSSL 15:117db924cf7c 216 "mulx 48(%[xp]), %%rax, %%rcx\n\t" \
wolfSSL 15:117db924cf7c 217 "adcxq %[cy], %%r14\n\t" \
wolfSSL 15:117db924cf7c 218 "adoxq %%rax, %%r14\n\t" \
wolfSSL 15:117db924cf7c 219 "adcxq %%rcx, %%r15\n\t" \
wolfSSL 15:117db924cf7c 220 "mulx 56(%[xp]), %%rax, %[cy]\n\t" \
wolfSSL 15:117db924cf7c 221 "movq $0, %%rdx\n\t" \
wolfSSL 15:117db924cf7c 222 "adoxq %%rdx, %%rax\n\t" \
wolfSSL 15:117db924cf7c 223 "adcxq %%rdx, %[cy]\n\t" \
wolfSSL 15:117db924cf7c 224 "adoxq %%rdx, %[cy]\n\t" \
wolfSSL 15:117db924cf7c 225 "addq %%rax, %%r15\n\t" \
wolfSSL 15:117db924cf7c 226 "adcq $0, %[cy]\n\t" \
wolfSSL 15:117db924cf7c 227 \
wolfSSL 15:117db924cf7c 228 "movq %%r8, 0(%[c])\n\t" \
wolfSSL 15:117db924cf7c 229 "movq %%r9, 8(%[c])\n\t" \
wolfSSL 15:117db924cf7c 230 "movq %%r10, 16(%[c])\n\t" \
wolfSSL 15:117db924cf7c 231 "movq %%r11, 24(%[c])\n\t" \
wolfSSL 15:117db924cf7c 232 "movq %%r12, 32(%[c])\n\t" \
wolfSSL 15:117db924cf7c 233 "movq %%r13, 40(%[c])\n\t" \
wolfSSL 15:117db924cf7c 234 "movq %%r14, 48(%[c])\n\t" \
wolfSSL 15:117db924cf7c 235 "movq %%r15, 56(%[c])\n\t" \
wolfSSL 15:117db924cf7c 236 : [cy] "+r" (cy) \
wolfSSL 15:117db924cf7c 237 : [xp] "r" (x), [c] "r" (c_mulx), [yn] "rm" (y) \
wolfSSL 15:117db924cf7c 238 :"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \
wolfSSL 15:117db924cf7c 239 "%rdx", "%rax", "%rcx" \
wolfSSL 15:117db924cf7c 240 )
wolfSSL 15:117db924cf7c 241
wolfSSL 15:117db924cf7c 242 #define INNERMUL8_MULX \
wolfSSL 15:117db924cf7c 243 {\
wolfSSL 15:117db924cf7c 244 MULX_INNERMUL8(tmpm, mu, _c, cy);\
wolfSSL 15:117db924cf7c 245 }
wolfSSL 15:117db924cf7c 246 #endif
wolfSSL 15:117db924cf7c 247
wolfSSL 15:117db924cf7c 248 #define INNERMUL8 \
wolfSSL 15:117db924cf7c 249 __asm__( \
wolfSSL 15:117db924cf7c 250 "movq 0(%5),%%rax \n\t" \
wolfSSL 15:117db924cf7c 251 "movq 0(%2),%%r10 \n\t" \
wolfSSL 15:117db924cf7c 252 "movq 0x8(%5),%%r11 \n\t" \
wolfSSL 15:117db924cf7c 253 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 254 "addq %%r10,%%rax \n\t" \
wolfSSL 15:117db924cf7c 255 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 256 "movq 0x8(%2),%%r10 \n\t" \
wolfSSL 15:117db924cf7c 257 "addq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 258 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 259 "movq %%rax,0(%0) \n\t" \
wolfSSL 15:117db924cf7c 260 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 261 \
wolfSSL 15:117db924cf7c 262 "movq %%r11,%%rax \n\t" \
wolfSSL 15:117db924cf7c 263 "movq 0x10(%5),%%r11 \n\t" \
wolfSSL 15:117db924cf7c 264 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 265 "addq %%r10,%%rax \n\t" \
wolfSSL 15:117db924cf7c 266 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 267 "movq 0x10(%2),%%r10 \n\t" \
wolfSSL 15:117db924cf7c 268 "addq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 269 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 270 "movq %%rax,0x8(%0) \n\t" \
wolfSSL 15:117db924cf7c 271 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 272 \
wolfSSL 15:117db924cf7c 273 "movq %%r11,%%rax \n\t" \
wolfSSL 15:117db924cf7c 274 "movq 0x18(%5),%%r11 \n\t" \
wolfSSL 15:117db924cf7c 275 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 276 "addq %%r10,%%rax \n\t" \
wolfSSL 15:117db924cf7c 277 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 278 "movq 0x18(%2),%%r10 \n\t" \
wolfSSL 15:117db924cf7c 279 "addq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 280 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 281 "movq %%rax,0x10(%0) \n\t" \
wolfSSL 15:117db924cf7c 282 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 283 \
wolfSSL 15:117db924cf7c 284 "movq %%r11,%%rax \n\t" \
wolfSSL 15:117db924cf7c 285 "movq 0x20(%5),%%r11 \n\t" \
wolfSSL 15:117db924cf7c 286 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 287 "addq %%r10,%%rax \n\t" \
wolfSSL 15:117db924cf7c 288 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 289 "movq 0x20(%2),%%r10 \n\t" \
wolfSSL 15:117db924cf7c 290 "addq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 291 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 292 "movq %%rax,0x18(%0) \n\t" \
wolfSSL 15:117db924cf7c 293 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 294 \
wolfSSL 15:117db924cf7c 295 "movq %%r11,%%rax \n\t" \
wolfSSL 15:117db924cf7c 296 "movq 0x28(%5),%%r11 \n\t" \
wolfSSL 15:117db924cf7c 297 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 298 "addq %%r10,%%rax \n\t" \
wolfSSL 15:117db924cf7c 299 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 300 "movq 0x28(%2),%%r10 \n\t" \
wolfSSL 15:117db924cf7c 301 "addq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 302 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 303 "movq %%rax,0x20(%0) \n\t" \
wolfSSL 15:117db924cf7c 304 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 305 \
wolfSSL 15:117db924cf7c 306 "movq %%r11,%%rax \n\t" \
wolfSSL 15:117db924cf7c 307 "movq 0x30(%5),%%r11 \n\t" \
wolfSSL 15:117db924cf7c 308 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 309 "addq %%r10,%%rax \n\t" \
wolfSSL 15:117db924cf7c 310 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 311 "movq 0x30(%2),%%r10 \n\t" \
wolfSSL 15:117db924cf7c 312 "addq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 313 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 314 "movq %%rax,0x28(%0) \n\t" \
wolfSSL 15:117db924cf7c 315 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 316 \
wolfSSL 15:117db924cf7c 317 "movq %%r11,%%rax \n\t" \
wolfSSL 15:117db924cf7c 318 "movq 0x38(%5),%%r11 \n\t" \
wolfSSL 15:117db924cf7c 319 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 320 "addq %%r10,%%rax \n\t" \
wolfSSL 15:117db924cf7c 321 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 322 "movq 0x38(%2),%%r10 \n\t" \
wolfSSL 15:117db924cf7c 323 "addq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 324 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 325 "movq %%rax,0x30(%0) \n\t" \
wolfSSL 15:117db924cf7c 326 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 327 \
wolfSSL 15:117db924cf7c 328 "movq %%r11,%%rax \n\t" \
wolfSSL 15:117db924cf7c 329 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 330 "addq %%r10,%%rax \n\t" \
wolfSSL 15:117db924cf7c 331 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 332 "addq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 333 "adcq $0,%%rdx \n\t" \
wolfSSL 15:117db924cf7c 334 "movq %%rax,0x38(%0) \n\t" \
wolfSSL 15:117db924cf7c 335 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 336 \
wolfSSL 15:117db924cf7c 337 :"=r"(_c), "=r"(cy) \
wolfSSL 15:117db924cf7c 338 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
wolfSSL 15:117db924cf7c 339 : "%rax", "%rdx", "%r10", "%r11", "cc")
wolfSSL 15:117db924cf7c 340
wolfSSL 15:117db924cf7c 341 #define PROPCARRY \
wolfSSL 15:117db924cf7c 342 __asm__( \
wolfSSL 15:117db924cf7c 343 "addq %1,%0 \n\t" \
wolfSSL 15:117db924cf7c 344 "setb %%al \n\t" \
wolfSSL 15:117db924cf7c 345 "movzbq %%al,%1 \n\t" \
wolfSSL 15:117db924cf7c 346 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 15:117db924cf7c 347 :"0"(_c[LO]), "1"(cy) \
wolfSSL 15:117db924cf7c 348 : "%rax", "cc")
wolfSSL 15:117db924cf7c 349
wolfSSL 15:117db924cf7c 350 /******************************************************************/
wolfSSL 15:117db924cf7c 351 #elif defined(TFM_SSE2)
wolfSSL 15:117db924cf7c 352 /* SSE2 code (assumes 32-bit fp_digits) */
wolfSSL 15:117db924cf7c 353 /* XMM register assignments:
wolfSSL 15:117db924cf7c 354 * xmm0 *tmpm++, then Mu * (*tmpm++)
wolfSSL 15:117db924cf7c 355 * xmm1 c[x], then Mu
wolfSSL 15:117db924cf7c 356 * xmm2 mp
wolfSSL 15:117db924cf7c 357 * xmm3 cy
wolfSSL 15:117db924cf7c 358 * xmm4 _c[LO]
wolfSSL 15:117db924cf7c 359 */
wolfSSL 15:117db924cf7c 360
wolfSSL 15:117db924cf7c 361 #define MONT_START \
wolfSSL 15:117db924cf7c 362 __asm__("movd %0,%%mm2"::"g"(mp))
wolfSSL 15:117db924cf7c 363
wolfSSL 15:117db924cf7c 364 #define MONT_FINI \
wolfSSL 15:117db924cf7c 365 __asm__("emms")
wolfSSL 15:117db924cf7c 366
wolfSSL 15:117db924cf7c 367 #define LOOP_START \
wolfSSL 15:117db924cf7c 368 __asm__( \
wolfSSL 15:117db924cf7c 369 "movd %0,%%mm1 \n\t" \
wolfSSL 15:117db924cf7c 370 "pxor %%mm3,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 371 "pmuludq %%mm2,%%mm1 \n\t" \
wolfSSL 15:117db924cf7c 372 :: "g"(c[x]))
wolfSSL 15:117db924cf7c 373
wolfSSL 15:117db924cf7c 374 /* pmuludq on mmx registers does a 32x32->64 multiply. */
wolfSSL 15:117db924cf7c 375 #define INNERMUL \
wolfSSL 15:117db924cf7c 376 __asm__( \
wolfSSL 15:117db924cf7c 377 "movd %1,%%mm4 \n\t" \
wolfSSL 15:117db924cf7c 378 "movd %2,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 379 "paddq %%mm4,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 380 "pmuludq %%mm1,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 381 "paddq %%mm0,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 382 "movd %%mm3,%0 \n\t" \
wolfSSL 15:117db924cf7c 383 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 384 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
wolfSSL 15:117db924cf7c 385
wolfSSL 15:117db924cf7c 386 #define INNERMUL8 \
wolfSSL 15:117db924cf7c 387 __asm__( \
wolfSSL 15:117db924cf7c 388 "movd 0(%1),%%mm4 \n\t" \
wolfSSL 15:117db924cf7c 389 "movd 0(%2),%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 390 "paddq %%mm4,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 391 "pmuludq %%mm1,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 392 "movd 4(%2),%%mm5 \n\t" \
wolfSSL 15:117db924cf7c 393 "paddq %%mm0,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 394 "movd 4(%1),%%mm6 \n\t" \
wolfSSL 15:117db924cf7c 395 "movd %%mm3,0(%0) \n\t" \
wolfSSL 15:117db924cf7c 396 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 397 \
wolfSSL 15:117db924cf7c 398 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 399 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 15:117db924cf7c 400 "movd 8(%2),%%mm6 \n\t" \
wolfSSL 15:117db924cf7c 401 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 402 "movd 8(%1),%%mm7 \n\t" \
wolfSSL 15:117db924cf7c 403 "movd %%mm3,4(%0) \n\t" \
wolfSSL 15:117db924cf7c 404 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 405 \
wolfSSL 15:117db924cf7c 406 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 407 "pmuludq %%mm1,%%mm6 \n\t" \
wolfSSL 15:117db924cf7c 408 "movd 12(%2),%%mm7 \n\t" \
wolfSSL 15:117db924cf7c 409 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 410 "movd 12(%1),%%mm5 \n\t" \
wolfSSL 15:117db924cf7c 411 "movd %%mm3,8(%0) \n\t" \
wolfSSL 15:117db924cf7c 412 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 413 \
wolfSSL 15:117db924cf7c 414 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 415 "pmuludq %%mm1,%%mm7 \n\t" \
wolfSSL 15:117db924cf7c 416 "movd 16(%2),%%mm5 \n\t" \
wolfSSL 15:117db924cf7c 417 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 418 "movd 16(%1),%%mm6 \n\t" \
wolfSSL 15:117db924cf7c 419 "movd %%mm3,12(%0) \n\t" \
wolfSSL 15:117db924cf7c 420 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 421 \
wolfSSL 15:117db924cf7c 422 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 423 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 15:117db924cf7c 424 "movd 20(%2),%%mm6 \n\t" \
wolfSSL 15:117db924cf7c 425 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 426 "movd 20(%1),%%mm7 \n\t" \
wolfSSL 15:117db924cf7c 427 "movd %%mm3,16(%0) \n\t" \
wolfSSL 15:117db924cf7c 428 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 429 \
wolfSSL 15:117db924cf7c 430 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 431 "pmuludq %%mm1,%%mm6 \n\t" \
wolfSSL 15:117db924cf7c 432 "movd 24(%2),%%mm7 \n\t" \
wolfSSL 15:117db924cf7c 433 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 434 "movd 24(%1),%%mm5 \n\t" \
wolfSSL 15:117db924cf7c 435 "movd %%mm3,20(%0) \n\t" \
wolfSSL 15:117db924cf7c 436 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 437 \
wolfSSL 15:117db924cf7c 438 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 439 "pmuludq %%mm1,%%mm7 \n\t" \
wolfSSL 15:117db924cf7c 440 "movd 28(%2),%%mm5 \n\t" \
wolfSSL 15:117db924cf7c 441 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 442 "movd 28(%1),%%mm6 \n\t" \
wolfSSL 15:117db924cf7c 443 "movd %%mm3,24(%0) \n\t" \
wolfSSL 15:117db924cf7c 444 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 445 \
wolfSSL 15:117db924cf7c 446 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 447 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 15:117db924cf7c 448 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 15:117db924cf7c 449 "movd %%mm3,28(%0) \n\t" \
wolfSSL 15:117db924cf7c 450 "psrlq $32, %%mm3 \n\t" \
wolfSSL 15:117db924cf7c 451 :"=r"(_c) : "0"(_c), "r"(tmpm) );
wolfSSL 15:117db924cf7c 452
wolfSSL 15:117db924cf7c 453 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack
wolfSSL 15:117db924cf7c 454 pointer */
wolfSSL 15:117db924cf7c 455
wolfSSL 15:117db924cf7c 456 #define LOOP_END \
wolfSSL 15:117db924cf7c 457 __asm__( "movd %%mm3,%0 \n" :"=r"(cy))
wolfSSL 15:117db924cf7c 458
wolfSSL 15:117db924cf7c 459 #define PROPCARRY \
wolfSSL 15:117db924cf7c 460 __asm__( \
wolfSSL 15:117db924cf7c 461 "addl %1,%0 \n\t" \
wolfSSL 15:117db924cf7c 462 "setb %%al \n\t" \
wolfSSL 15:117db924cf7c 463 "movzbl %%al,%1 \n\t" \
wolfSSL 15:117db924cf7c 464 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 15:117db924cf7c 465 :"0"(_c[LO]), "1"(cy) \
wolfSSL 15:117db924cf7c 466 : "%eax", "cc")
wolfSSL 15:117db924cf7c 467
wolfSSL 15:117db924cf7c 468 /******************************************************************/
wolfSSL 15:117db924cf7c 469 #elif defined(TFM_ARM)
wolfSSL 15:117db924cf7c 470 /* ARMv4 code */
wolfSSL 15:117db924cf7c 471
wolfSSL 15:117db924cf7c 472 #define MONT_START
wolfSSL 15:117db924cf7c 473 #define MONT_FINI
wolfSSL 15:117db924cf7c 474 #define LOOP_END
wolfSSL 15:117db924cf7c 475 #define LOOP_START \
wolfSSL 15:117db924cf7c 476 mu = c[x] * mp
wolfSSL 15:117db924cf7c 477
wolfSSL 15:117db924cf7c 478
wolfSSL 15:117db924cf7c 479 #ifdef __thumb__
wolfSSL 15:117db924cf7c 480
wolfSSL 15:117db924cf7c 481 #define INNERMUL \
wolfSSL 15:117db924cf7c 482 __asm__( \
wolfSSL 15:117db924cf7c 483 " LDR r0,%1 \n\t" \
wolfSSL 15:117db924cf7c 484 " ADDS r0,r0,%0 \n\t" \
wolfSSL 15:117db924cf7c 485 " ITE CS \n\t" \
wolfSSL 15:117db924cf7c 486 " MOVCS %0,#1 \n\t" \
wolfSSL 15:117db924cf7c 487 " MOVCC %0,#0 \n\t" \
wolfSSL 15:117db924cf7c 488 " UMLAL r0,%0,%3,%4 \n\t" \
wolfSSL 15:117db924cf7c 489 " STR r0,%1 \n\t" \
wolfSSL 15:117db924cf7c 490 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc");
wolfSSL 15:117db924cf7c 491
wolfSSL 15:117db924cf7c 492 #define PROPCARRY \
wolfSSL 15:117db924cf7c 493 __asm__( \
wolfSSL 15:117db924cf7c 494 " LDR r0,%1 \n\t" \
wolfSSL 15:117db924cf7c 495 " ADDS r0,r0,%0 \n\t" \
wolfSSL 15:117db924cf7c 496 " STR r0,%1 \n\t" \
wolfSSL 15:117db924cf7c 497 " ITE CS \n\t" \
wolfSSL 15:117db924cf7c 498 " MOVCS %0,#1 \n\t" \
wolfSSL 15:117db924cf7c 499 " MOVCC %0,#0 \n\t" \
wolfSSL 15:117db924cf7c 500 :"=r"(cy),"=m"(_c[0]):"0"(cy),"m"(_c[0]):"r0","cc");
wolfSSL 15:117db924cf7c 501
wolfSSL 15:117db924cf7c 502
wolfSSL 15:117db924cf7c 503 /* TAO thumb mode uses ite (if then else) to detect carry directly
wolfSSL 15:117db924cf7c 504 * fixed unmatched constraint warning by changing 1 to m */
wolfSSL 15:117db924cf7c 505
wolfSSL 15:117db924cf7c 506 #else /* __thumb__ */
wolfSSL 15:117db924cf7c 507
wolfSSL 15:117db924cf7c 508 #define INNERMUL \
wolfSSL 15:117db924cf7c 509 __asm__( \
wolfSSL 15:117db924cf7c 510 " LDR r0,%1 \n\t" \
wolfSSL 15:117db924cf7c 511 " ADDS r0,r0,%0 \n\t" \
wolfSSL 15:117db924cf7c 512 " MOVCS %0,#1 \n\t" \
wolfSSL 15:117db924cf7c 513 " MOVCC %0,#0 \n\t" \
wolfSSL 15:117db924cf7c 514 " UMLAL r0,%0,%3,%4 \n\t" \
wolfSSL 15:117db924cf7c 515 " STR r0,%1 \n\t" \
wolfSSL 15:117db924cf7c 516 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
wolfSSL 15:117db924cf7c 517
wolfSSL 15:117db924cf7c 518 #define PROPCARRY \
wolfSSL 15:117db924cf7c 519 __asm__( \
wolfSSL 15:117db924cf7c 520 " LDR r0,%1 \n\t" \
wolfSSL 15:117db924cf7c 521 " ADDS r0,r0,%0 \n\t" \
wolfSSL 15:117db924cf7c 522 " STR r0,%1 \n\t" \
wolfSSL 15:117db924cf7c 523 " MOVCS %0,#1 \n\t" \
wolfSSL 15:117db924cf7c 524 " MOVCC %0,#0 \n\t" \
wolfSSL 15:117db924cf7c 525 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc");
wolfSSL 15:117db924cf7c 526
wolfSSL 15:117db924cf7c 527 #endif /* __thumb__ */
wolfSSL 15:117db924cf7c 528
wolfSSL 15:117db924cf7c 529 #elif defined(TFM_PPC32)
wolfSSL 15:117db924cf7c 530
wolfSSL 15:117db924cf7c 531 /* PPC32 */
wolfSSL 15:117db924cf7c 532 #define MONT_START
wolfSSL 15:117db924cf7c 533 #define MONT_FINI
wolfSSL 15:117db924cf7c 534 #define LOOP_END
wolfSSL 15:117db924cf7c 535 #define LOOP_START \
wolfSSL 15:117db924cf7c 536 mu = c[x] * mp
wolfSSL 15:117db924cf7c 537
wolfSSL 15:117db924cf7c 538 #define INNERMUL \
wolfSSL 15:117db924cf7c 539 __asm__( \
wolfSSL 15:117db924cf7c 540 " mullw 16,%3,%4 \n\t" \
wolfSSL 15:117db924cf7c 541 " mulhwu 17,%3,%4 \n\t" \
wolfSSL 15:117db924cf7c 542 " addc 16,16,%2 \n\t" \
wolfSSL 15:117db924cf7c 543 " addze 17,17 \n\t" \
wolfSSL 15:117db924cf7c 544 " addc %1,16,%5 \n\t" \
wolfSSL 15:117db924cf7c 545 " addze %0,17 \n\t" \
wolfSSL 15:117db924cf7c 546 :"=r"(cy),"=r"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "cc"); ++tmpm;
wolfSSL 15:117db924cf7c 547
wolfSSL 15:117db924cf7c 548 #define PROPCARRY \
wolfSSL 15:117db924cf7c 549 __asm__( \
wolfSSL 15:117db924cf7c 550 " addc %1,%3,%2 \n\t" \
wolfSSL 15:117db924cf7c 551 " xor %0,%2,%2 \n\t" \
wolfSSL 15:117db924cf7c 552 " addze %0,%2 \n\t" \
wolfSSL 15:117db924cf7c 553 :"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc");
wolfSSL 15:117db924cf7c 554
wolfSSL 15:117db924cf7c 555 #elif defined(TFM_PPC64)
wolfSSL 15:117db924cf7c 556
wolfSSL 15:117db924cf7c 557 /* PPC64 */
wolfSSL 15:117db924cf7c 558 #define MONT_START
wolfSSL 15:117db924cf7c 559 #define MONT_FINI
wolfSSL 15:117db924cf7c 560 #define LOOP_END
wolfSSL 15:117db924cf7c 561 #define LOOP_START \
wolfSSL 15:117db924cf7c 562 mu = c[x] * mp
wolfSSL 15:117db924cf7c 563
wolfSSL 15:117db924cf7c 564 #define INNERMUL \
wolfSSL 15:117db924cf7c 565 __asm__( \
wolfSSL 15:117db924cf7c 566 " mulld r16,%3,%4 \n\t" \
wolfSSL 15:117db924cf7c 567 " mulhdu r17,%3,%4 \n\t" \
wolfSSL 15:117db924cf7c 568 " addc r16,16,%0 \n\t" \
wolfSSL 15:117db924cf7c 569 " addze r17,r17 \n\t" \
wolfSSL 15:117db924cf7c 570 " ldx r18,0,%1 \n\t" \
wolfSSL 15:117db924cf7c 571 " addc r16,r16,r18 \n\t" \
wolfSSL 15:117db924cf7c 572 " addze %0,r17 \n\t" \
wolfSSL 15:117db924cf7c 573 " sdx r16,0,%1 \n\t" \
wolfSSL 15:117db924cf7c 574 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","cc"); ++tmpm;
wolfSSL 15:117db924cf7c 575
wolfSSL 15:117db924cf7c 576 #define PROPCARRY \
wolfSSL 15:117db924cf7c 577 __asm__( \
wolfSSL 15:117db924cf7c 578 " ldx r16,0,%1 \n\t" \
wolfSSL 15:117db924cf7c 579 " addc r16,r16,%0 \n\t" \
wolfSSL 15:117db924cf7c 580 " sdx r16,0,%1 \n\t" \
wolfSSL 15:117db924cf7c 581 " xor %0,%0,%0 \n\t" \
wolfSSL 15:117db924cf7c 582 " addze %0,%0 \n\t" \
wolfSSL 15:117db924cf7c 583 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","cc");
wolfSSL 15:117db924cf7c 584
wolfSSL 15:117db924cf7c 585 /******************************************************************/
wolfSSL 15:117db924cf7c 586
wolfSSL 15:117db924cf7c 587 #elif defined(TFM_AVR32)
wolfSSL 15:117db924cf7c 588
wolfSSL 15:117db924cf7c 589 /* AVR32 */
wolfSSL 15:117db924cf7c 590 #define MONT_START
wolfSSL 15:117db924cf7c 591 #define MONT_FINI
wolfSSL 15:117db924cf7c 592 #define LOOP_END
wolfSSL 15:117db924cf7c 593 #define LOOP_START \
wolfSSL 15:117db924cf7c 594 mu = c[x] * mp
wolfSSL 15:117db924cf7c 595
wolfSSL 15:117db924cf7c 596 #define INNERMUL \
wolfSSL 15:117db924cf7c 597 __asm__( \
wolfSSL 15:117db924cf7c 598 " ld.w r2,%1 \n\t" \
wolfSSL 15:117db924cf7c 599 " add r2,%0 \n\t" \
wolfSSL 15:117db924cf7c 600 " eor r3,r3 \n\t" \
wolfSSL 15:117db924cf7c 601 " acr r3 \n\t" \
wolfSSL 15:117db924cf7c 602 " macu.d r2,%3,%4 \n\t" \
wolfSSL 15:117db924cf7c 603 " st.w %1,r2 \n\t" \
wolfSSL 15:117db924cf7c 604 " mov %0,r3 \n\t" \
wolfSSL 15:117db924cf7c 605 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
wolfSSL 15:117db924cf7c 606
wolfSSL 15:117db924cf7c 607 #define PROPCARRY \
wolfSSL 15:117db924cf7c 608 __asm__( \
wolfSSL 15:117db924cf7c 609 " ld.w r2,%1 \n\t" \
wolfSSL 15:117db924cf7c 610 " add r2,%0 \n\t" \
wolfSSL 15:117db924cf7c 611 " st.w %1,r2 \n\t" \
wolfSSL 15:117db924cf7c 612 " eor %0,%0 \n\t" \
wolfSSL 15:117db924cf7c 613 " acr %0 \n\t" \
wolfSSL 15:117db924cf7c 614 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
wolfSSL 15:117db924cf7c 615
wolfSSL 15:117db924cf7c 616 /******************************************************************/
wolfSSL 15:117db924cf7c 617 #elif defined(TFM_MIPS)
wolfSSL 15:117db924cf7c 618
wolfSSL 15:117db924cf7c 619 /* MIPS */
wolfSSL 15:117db924cf7c 620 #define MONT_START
wolfSSL 15:117db924cf7c 621 #define MONT_FINI
wolfSSL 15:117db924cf7c 622 #define LOOP_END
wolfSSL 15:117db924cf7c 623 #define LOOP_START \
wolfSSL 15:117db924cf7c 624 mu = c[x] * mp
wolfSSL 15:117db924cf7c 625
wolfSSL 15:117db924cf7c 626 #define INNERMUL \
wolfSSL 15:117db924cf7c 627 __asm__( \
wolfSSL 15:117db924cf7c 628 " multu %3,%4 \n\t" \
wolfSSL 15:117db924cf7c 629 " mflo $12 \n\t" \
wolfSSL 15:117db924cf7c 630 " mfhi $13 \n\t" \
wolfSSL 15:117db924cf7c 631 " addu $12,$12,%0 \n\t" \
wolfSSL 15:117db924cf7c 632 " sltu $10,$12,%0 \n\t" \
wolfSSL 15:117db924cf7c 633 " addu $13,$13,$10 \n\t" \
wolfSSL 15:117db924cf7c 634 " lw $10,%1 \n\t" \
wolfSSL 15:117db924cf7c 635 " addu $12,$12,$10 \n\t" \
wolfSSL 15:117db924cf7c 636 " sltu $10,$12,$10 \n\t" \
wolfSSL 15:117db924cf7c 637 " addu %0,$13,$10 \n\t" \
wolfSSL 15:117db924cf7c 638 " sw $12,%1 \n\t" \
wolfSSL 15:117db924cf7c 639 :"+r"(cy),"+m"(_c[0]):""(cy),"r"(mu),"r"(tmpm[0]),""(_c[0]):"$10","$12","$13"); ++tmpm;
wolfSSL 15:117db924cf7c 640
wolfSSL 15:117db924cf7c 641 #define PROPCARRY \
wolfSSL 15:117db924cf7c 642 __asm__( \
wolfSSL 15:117db924cf7c 643 " lw $10,%1 \n\t" \
wolfSSL 15:117db924cf7c 644 " addu $10,$10,%0 \n\t" \
wolfSSL 15:117db924cf7c 645 " sw $10,%1 \n\t" \
wolfSSL 15:117db924cf7c 646 " sltu %0,$10,%0 \n\t" \
wolfSSL 15:117db924cf7c 647 :"+r"(cy),"+m"(_c[0]):""(cy),""(_c[0]):"$10");
wolfSSL 15:117db924cf7c 648
wolfSSL 15:117db924cf7c 649 /******************************************************************/
wolfSSL 15:117db924cf7c 650 #else
wolfSSL 15:117db924cf7c 651
wolfSSL 15:117db924cf7c 652 /* ISO C code */
wolfSSL 15:117db924cf7c 653 #define MONT_START
wolfSSL 15:117db924cf7c 654 #define MONT_FINI
wolfSSL 15:117db924cf7c 655 #define LOOP_END
wolfSSL 15:117db924cf7c 656 #define LOOP_START \
wolfSSL 15:117db924cf7c 657 mu = c[x] * mp
wolfSSL 15:117db924cf7c 658
wolfSSL 15:117db924cf7c 659 #define INNERMUL \
wolfSSL 15:117db924cf7c 660 do { fp_word t; \
wolfSSL 15:117db924cf7c 661 t = ((fp_word)_c[0] + (fp_word)cy) + \
wolfSSL 15:117db924cf7c 662 (((fp_word)mu) * ((fp_word)*tmpm++)); \
wolfSSL 15:117db924cf7c 663 _c[0] = (fp_digit)t; \
wolfSSL 15:117db924cf7c 664 cy = (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 15:117db924cf7c 665 } while (0)
wolfSSL 15:117db924cf7c 666
wolfSSL 15:117db924cf7c 667 #define PROPCARRY \
wolfSSL 15:117db924cf7c 668 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
wolfSSL 15:117db924cf7c 669
wolfSSL 15:117db924cf7c 670 #endif
wolfSSL 15:117db924cf7c 671 /******************************************************************/
wolfSSL 15:117db924cf7c 672
wolfSSL 15:117db924cf7c 673
wolfSSL 15:117db924cf7c 674 #define LO 0
wolfSSL 15:117db924cf7c 675 /* end fp_montogomery_reduce.c asm */
wolfSSL 15:117db924cf7c 676
wolfSSL 15:117db924cf7c 677
wolfSSL 15:117db924cf7c 678 /* start fp_sqr_comba.c asm */
wolfSSL 15:117db924cf7c 679 #if defined(TFM_X86)
wolfSSL 15:117db924cf7c 680
wolfSSL 15:117db924cf7c 681 /* x86-32 optimized */
wolfSSL 15:117db924cf7c 682
wolfSSL 15:117db924cf7c 683 #define COMBA_START
wolfSSL 15:117db924cf7c 684
wolfSSL 15:117db924cf7c 685 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 686 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 687
wolfSSL 15:117db924cf7c 688 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 689 x = c0;
wolfSSL 15:117db924cf7c 690
wolfSSL 15:117db924cf7c 691 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 692 x = c1;
wolfSSL 15:117db924cf7c 693
wolfSSL 15:117db924cf7c 694 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 695 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 696
wolfSSL 15:117db924cf7c 697 #define COMBA_FINI
wolfSSL 15:117db924cf7c 698
wolfSSL 15:117db924cf7c 699 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 700 __asm__( \
wolfSSL 15:117db924cf7c 701 "movl %6,%%eax \n\t" \
wolfSSL 15:117db924cf7c 702 "mull %%eax \n\t" \
wolfSSL 15:117db924cf7c 703 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 704 "adcl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 705 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 706 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
wolfSSL 15:117db924cf7c 707
wolfSSL 15:117db924cf7c 708 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 709 __asm__( \
wolfSSL 15:117db924cf7c 710 "movl %6,%%eax \n\t" \
wolfSSL 15:117db924cf7c 711 "mull %7 \n\t" \
wolfSSL 15:117db924cf7c 712 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 713 "adcl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 714 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 715 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 716 "adcl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 717 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 718 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc");
wolfSSL 15:117db924cf7c 719
wolfSSL 15:117db924cf7c 720 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 721 __asm__( \
wolfSSL 15:117db924cf7c 722 "movl %3,%%eax \n\t" \
wolfSSL 15:117db924cf7c 723 "mull %4 \n\t" \
wolfSSL 15:117db924cf7c 724 "movl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 725 "movl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 726 "xorl %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 727 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
wolfSSL 15:117db924cf7c 728
wolfSSL 15:117db924cf7c 729 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 730 __asm__( \
wolfSSL 15:117db924cf7c 731 "movl %6,%%eax \n\t" \
wolfSSL 15:117db924cf7c 732 "mull %7 \n\t" \
wolfSSL 15:117db924cf7c 733 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 734 "adcl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 735 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 736 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
wolfSSL 15:117db924cf7c 737
wolfSSL 15:117db924cf7c 738 #define SQRADDDB \
wolfSSL 15:117db924cf7c 739 __asm__( \
wolfSSL 15:117db924cf7c 740 "addl %6,%0 \n\t" \
wolfSSL 15:117db924cf7c 741 "adcl %7,%1 \n\t" \
wolfSSL 15:117db924cf7c 742 "adcl %8,%2 \n\t" \
wolfSSL 15:117db924cf7c 743 "addl %6,%0 \n\t" \
wolfSSL 15:117db924cf7c 744 "adcl %7,%1 \n\t" \
wolfSSL 15:117db924cf7c 745 "adcl %8,%2 \n\t" \
wolfSSL 15:117db924cf7c 746 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 15:117db924cf7c 747
wolfSSL 15:117db924cf7c 748 #elif defined(TFM_X86_64)
wolfSSL 15:117db924cf7c 749 /* x86-64 optimized */
wolfSSL 15:117db924cf7c 750
wolfSSL 15:117db924cf7c 751 #define COMBA_START
wolfSSL 15:117db924cf7c 752
wolfSSL 15:117db924cf7c 753 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 754 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 755
wolfSSL 15:117db924cf7c 756 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 757 x = c0;
wolfSSL 15:117db924cf7c 758
wolfSSL 15:117db924cf7c 759 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 760 x = c1;
wolfSSL 15:117db924cf7c 761
wolfSSL 15:117db924cf7c 762 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 763 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 764
wolfSSL 15:117db924cf7c 765 #define COMBA_FINI
wolfSSL 15:117db924cf7c 766
wolfSSL 15:117db924cf7c 767 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 768 __asm__( \
wolfSSL 15:117db924cf7c 769 "movq %6,%%rax \n\t" \
wolfSSL 15:117db924cf7c 770 "mulq %%rax \n\t" \
wolfSSL 15:117db924cf7c 771 "addq %%rax,%0 \n\t" \
wolfSSL 15:117db924cf7c 772 "adcq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 773 "adcq $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 774 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "x"(i) :"%rax","%rdx","cc");
wolfSSL 15:117db924cf7c 775
wolfSSL 15:117db924cf7c 776 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 777 __asm__( \
wolfSSL 15:117db924cf7c 778 "movq %6,%%rax \n\t" \
wolfSSL 15:117db924cf7c 779 "mulq %7 \n\t" \
wolfSSL 15:117db924cf7c 780 "addq %%rax,%0 \n\t" \
wolfSSL 15:117db924cf7c 781 "adcq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 782 "adcq $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 783 "addq %%rax,%0 \n\t" \
wolfSSL 15:117db924cf7c 784 "adcq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 785 "adcq $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 786 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 15:117db924cf7c 787
wolfSSL 15:117db924cf7c 788 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 789 __asm__( \
wolfSSL 15:117db924cf7c 790 "movq %3,%%rax \n\t" \
wolfSSL 15:117db924cf7c 791 "mulq %4 \n\t" \
wolfSSL 15:117db924cf7c 792 "movq %%rax,%0 \n\t" \
wolfSSL 15:117db924cf7c 793 "movq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 794 "xorq %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 795 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 15:117db924cf7c 796
wolfSSL 15:117db924cf7c 797 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 798 __asm__( \
wolfSSL 15:117db924cf7c 799 "movq %6,%%rax \n\t" \
wolfSSL 15:117db924cf7c 800 "mulq %7 \n\t" \
wolfSSL 15:117db924cf7c 801 "addq %%rax,%0 \n\t" \
wolfSSL 15:117db924cf7c 802 "adcq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 803 "adcq $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 804 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 15:117db924cf7c 805
wolfSSL 15:117db924cf7c 806 #define SQRADDDB \
wolfSSL 15:117db924cf7c 807 __asm__( \
wolfSSL 15:117db924cf7c 808 "addq %6,%0 \n\t" \
wolfSSL 15:117db924cf7c 809 "adcq %7,%1 \n\t" \
wolfSSL 15:117db924cf7c 810 "adcq %8,%2 \n\t" \
wolfSSL 15:117db924cf7c 811 "addq %6,%0 \n\t" \
wolfSSL 15:117db924cf7c 812 "adcq %7,%1 \n\t" \
wolfSSL 15:117db924cf7c 813 "adcq %8,%2 \n\t" \
wolfSSL 15:117db924cf7c 814 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 15:117db924cf7c 815
wolfSSL 15:117db924cf7c 816 #elif defined(TFM_SSE2)
wolfSSL 15:117db924cf7c 817
wolfSSL 15:117db924cf7c 818 /* SSE2 Optimized */
wolfSSL 15:117db924cf7c 819 #define COMBA_START
wolfSSL 15:117db924cf7c 820
wolfSSL 15:117db924cf7c 821 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 822 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 823
wolfSSL 15:117db924cf7c 824 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 825 x = c0;
wolfSSL 15:117db924cf7c 826
wolfSSL 15:117db924cf7c 827 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 828 x = c1;
wolfSSL 15:117db924cf7c 829
wolfSSL 15:117db924cf7c 830 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 831 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 832
wolfSSL 15:117db924cf7c 833 #define COMBA_FINI \
wolfSSL 15:117db924cf7c 834 __asm__("emms");
wolfSSL 15:117db924cf7c 835
wolfSSL 15:117db924cf7c 836 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 837 __asm__( \
wolfSSL 15:117db924cf7c 838 "movd %6,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 839 "pmuludq %%mm0,%%mm0\n\t" \
wolfSSL 15:117db924cf7c 840 "movd %%mm0,%%eax \n\t" \
wolfSSL 15:117db924cf7c 841 "psrlq $32,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 842 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 843 "movd %%mm0,%%eax \n\t" \
wolfSSL 15:117db924cf7c 844 "adcl %%eax,%1 \n\t" \
wolfSSL 15:117db924cf7c 845 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 846 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
wolfSSL 15:117db924cf7c 847
wolfSSL 15:117db924cf7c 848 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 849 __asm__( \
wolfSSL 15:117db924cf7c 850 "movd %6,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 851 "movd %7,%%mm1 \n\t" \
wolfSSL 15:117db924cf7c 852 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 15:117db924cf7c 853 "movd %%mm0,%%eax \n\t" \
wolfSSL 15:117db924cf7c 854 "psrlq $32,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 855 "movd %%mm0,%%edx \n\t" \
wolfSSL 15:117db924cf7c 856 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 857 "adcl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 858 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 859 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 860 "adcl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 861 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 862 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 15:117db924cf7c 863
wolfSSL 15:117db924cf7c 864 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 865 __asm__( \
wolfSSL 15:117db924cf7c 866 "movd %3,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 867 "movd %4,%%mm1 \n\t" \
wolfSSL 15:117db924cf7c 868 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 15:117db924cf7c 869 "movd %%mm0,%0 \n\t" \
wolfSSL 15:117db924cf7c 870 "psrlq $32,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 871 "movd %%mm0,%1 \n\t" \
wolfSSL 15:117db924cf7c 872 "xorl %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 873 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j));
wolfSSL 15:117db924cf7c 874
wolfSSL 15:117db924cf7c 875 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 15:117db924cf7c 876
wolfSSL 15:117db924cf7c 877 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 878 __asm__( \
wolfSSL 15:117db924cf7c 879 "movd %6,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 880 "movd %7,%%mm1 \n\t" \
wolfSSL 15:117db924cf7c 881 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 15:117db924cf7c 882 "movd %%mm0,%%eax \n\t" \
wolfSSL 15:117db924cf7c 883 "psrlq $32,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 884 "movd %%mm0,%%edx \n\t" \
wolfSSL 15:117db924cf7c 885 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 886 "adcl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 887 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 888 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 15:117db924cf7c 889
wolfSSL 15:117db924cf7c 890 #define SQRADDDB \
wolfSSL 15:117db924cf7c 891 __asm__( \
wolfSSL 15:117db924cf7c 892 "addl %6,%0 \n\t" \
wolfSSL 15:117db924cf7c 893 "adcl %7,%1 \n\t" \
wolfSSL 15:117db924cf7c 894 "adcl %8,%2 \n\t" \
wolfSSL 15:117db924cf7c 895 "addl %6,%0 \n\t" \
wolfSSL 15:117db924cf7c 896 "adcl %7,%1 \n\t" \
wolfSSL 15:117db924cf7c 897 "adcl %8,%2 \n\t" \
wolfSSL 15:117db924cf7c 898 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 15:117db924cf7c 899
wolfSSL 15:117db924cf7c 900 #elif defined(TFM_ARM)
wolfSSL 15:117db924cf7c 901
wolfSSL 15:117db924cf7c 902 /* ARM code */
wolfSSL 15:117db924cf7c 903
wolfSSL 15:117db924cf7c 904 #define COMBA_START
wolfSSL 15:117db924cf7c 905
wolfSSL 15:117db924cf7c 906 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 907 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 908
wolfSSL 15:117db924cf7c 909 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 910 x = c0;
wolfSSL 15:117db924cf7c 911
wolfSSL 15:117db924cf7c 912 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 913 x = c1;
wolfSSL 15:117db924cf7c 914
wolfSSL 15:117db924cf7c 915 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 916 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 917
wolfSSL 15:117db924cf7c 918 #define COMBA_FINI
wolfSSL 15:117db924cf7c 919
wolfSSL 15:117db924cf7c 920 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 15:117db924cf7c 921 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 922 __asm__( \
wolfSSL 15:117db924cf7c 923 " UMULL r0,r1,%6,%6 \n\t" \
wolfSSL 15:117db924cf7c 924 " ADDS %0,%0,r0 \n\t" \
wolfSSL 15:117db924cf7c 925 " ADCS %1,%1,r1 \n\t" \
wolfSSL 15:117db924cf7c 926 " ADC %2,%2,#0 \n\t" \
wolfSSL 15:117db924cf7c 927 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
wolfSSL 15:117db924cf7c 928
wolfSSL 15:117db924cf7c 929 /* for squaring some of the terms are doubled... */
wolfSSL 15:117db924cf7c 930 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 931 __asm__( \
wolfSSL 15:117db924cf7c 932 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 933 " ADDS %0,%0,r0 \n\t" \
wolfSSL 15:117db924cf7c 934 " ADCS %1,%1,r1 \n\t" \
wolfSSL 15:117db924cf7c 935 " ADC %2,%2,#0 \n\t" \
wolfSSL 15:117db924cf7c 936 " ADDS %0,%0,r0 \n\t" \
wolfSSL 15:117db924cf7c 937 " ADCS %1,%1,r1 \n\t" \
wolfSSL 15:117db924cf7c 938 " ADC %2,%2,#0 \n\t" \
wolfSSL 15:117db924cf7c 939 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 15:117db924cf7c 940
wolfSSL 15:117db924cf7c 941 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 942 __asm__( \
wolfSSL 15:117db924cf7c 943 " UMULL %0,%1,%3,%4 \n\t" \
wolfSSL 15:117db924cf7c 944 " SUB %2,%2,%2 \n\t" \
wolfSSL 15:117db924cf7c 945 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc");
wolfSSL 15:117db924cf7c 946
wolfSSL 15:117db924cf7c 947 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 15:117db924cf7c 948
wolfSSL 15:117db924cf7c 949 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 950 __asm__( \
wolfSSL 15:117db924cf7c 951 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 952 " ADDS %0,%0,r0 \n\t" \
wolfSSL 15:117db924cf7c 953 " ADCS %1,%1,r1 \n\t" \
wolfSSL 15:117db924cf7c 954 " ADC %2,%2,#0 \n\t" \
wolfSSL 15:117db924cf7c 955 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 15:117db924cf7c 956
wolfSSL 15:117db924cf7c 957 #define SQRADDDB \
wolfSSL 15:117db924cf7c 958 __asm__( \
wolfSSL 15:117db924cf7c 959 " ADDS %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 960 " ADCS %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 961 " ADC %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 962 " ADDS %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 963 " ADCS %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 964 " ADC %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 965 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 15:117db924cf7c 966
wolfSSL 15:117db924cf7c 967 #elif defined(TFM_PPC32)
wolfSSL 15:117db924cf7c 968
wolfSSL 15:117db924cf7c 969 /* PPC32 */
wolfSSL 15:117db924cf7c 970
wolfSSL 15:117db924cf7c 971 #define COMBA_START
wolfSSL 15:117db924cf7c 972
wolfSSL 15:117db924cf7c 973 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 974 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 975
wolfSSL 15:117db924cf7c 976 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 977 x = c0;
wolfSSL 15:117db924cf7c 978
wolfSSL 15:117db924cf7c 979 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 980 x = c1;
wolfSSL 15:117db924cf7c 981
wolfSSL 15:117db924cf7c 982 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 983 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 984
wolfSSL 15:117db924cf7c 985 #define COMBA_FINI
wolfSSL 15:117db924cf7c 986
wolfSSL 15:117db924cf7c 987 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 15:117db924cf7c 988 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 989 __asm__( \
wolfSSL 15:117db924cf7c 990 " mullw 16,%6,%6 \n\t" \
wolfSSL 15:117db924cf7c 991 " addc %0,%0,16 \n\t" \
wolfSSL 15:117db924cf7c 992 " mulhwu 16,%6,%6 \n\t" \
wolfSSL 15:117db924cf7c 993 " adde %1,%1,16 \n\t" \
wolfSSL 15:117db924cf7c 994 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 995 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
wolfSSL 15:117db924cf7c 996
wolfSSL 15:117db924cf7c 997 /* for squaring some of the terms are doubled... */
wolfSSL 15:117db924cf7c 998 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 999 __asm__( \
wolfSSL 15:117db924cf7c 1000 " mullw 16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1001 " mulhwu 17,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1002 " addc %0,%0,16 \n\t" \
wolfSSL 15:117db924cf7c 1003 " adde %1,%1,17 \n\t" \
wolfSSL 15:117db924cf7c 1004 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1005 " addc %0,%0,16 \n\t" \
wolfSSL 15:117db924cf7c 1006 " adde %1,%1,17 \n\t" \
wolfSSL 15:117db924cf7c 1007 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1008 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
wolfSSL 15:117db924cf7c 1009
wolfSSL 15:117db924cf7c 1010 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 1011 __asm__( \
wolfSSL 15:117db924cf7c 1012 " mullw %0,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1013 " mulhwu %1,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1014 " xor %2,%2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1015 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
wolfSSL 15:117db924cf7c 1016
wolfSSL 15:117db924cf7c 1017 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 1018 __asm__( \
wolfSSL 15:117db924cf7c 1019 " mullw 16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1020 " addc %0,%0,16 \n\t" \
wolfSSL 15:117db924cf7c 1021 " mulhwu 16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1022 " adde %1,%1,16 \n\t" \
wolfSSL 15:117db924cf7c 1023 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1024 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
wolfSSL 15:117db924cf7c 1025
wolfSSL 15:117db924cf7c 1026 #define SQRADDDB \
wolfSSL 15:117db924cf7c 1027 __asm__( \
wolfSSL 15:117db924cf7c 1028 " addc %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1029 " adde %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1030 " adde %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 1031 " addc %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1032 " adde %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1033 " adde %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 1034 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 15:117db924cf7c 1035
wolfSSL 15:117db924cf7c 1036 #elif defined(TFM_PPC64)
wolfSSL 15:117db924cf7c 1037 /* PPC64 */
wolfSSL 15:117db924cf7c 1038
wolfSSL 15:117db924cf7c 1039 #define COMBA_START
wolfSSL 15:117db924cf7c 1040
wolfSSL 15:117db924cf7c 1041 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 1042 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1043
wolfSSL 15:117db924cf7c 1044 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1045 x = c0;
wolfSSL 15:117db924cf7c 1046
wolfSSL 15:117db924cf7c 1047 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1048 x = c1;
wolfSSL 15:117db924cf7c 1049
wolfSSL 15:117db924cf7c 1050 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 1051 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1052
wolfSSL 15:117db924cf7c 1053 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1054
wolfSSL 15:117db924cf7c 1055 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 15:117db924cf7c 1056 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 1057 __asm__( \
wolfSSL 15:117db924cf7c 1058 " mulld r16,%6,%6 \n\t" \
wolfSSL 15:117db924cf7c 1059 " addc %0,%0,r16 \n\t" \
wolfSSL 15:117db924cf7c 1060 " mulhdu r16,%6,%6 \n\t" \
wolfSSL 15:117db924cf7c 1061 " adde %1,%1,r16 \n\t" \
wolfSSL 15:117db924cf7c 1062 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1063 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","cc");
wolfSSL 15:117db924cf7c 1064
wolfSSL 15:117db924cf7c 1065 /* for squaring some of the terms are doubled... */
wolfSSL 15:117db924cf7c 1066 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 1067 __asm__( \
wolfSSL 15:117db924cf7c 1068 " mulld r16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1069 " mulhdu r17,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1070 " addc %0,%0,r16 \n\t" \
wolfSSL 15:117db924cf7c 1071 " adde %1,%1,r17 \n\t" \
wolfSSL 15:117db924cf7c 1072 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1073 " addc %0,%0,r16 \n\t" \
wolfSSL 15:117db924cf7c 1074 " adde %1,%1,r17 \n\t" \
wolfSSL 15:117db924cf7c 1075 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1076 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","cc");
wolfSSL 15:117db924cf7c 1077
wolfSSL 15:117db924cf7c 1078 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 1079 __asm__( \
wolfSSL 15:117db924cf7c 1080 " mulld %0,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1081 " mulhdu %1,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1082 " xor %2,%2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1083 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
wolfSSL 15:117db924cf7c 1084
wolfSSL 15:117db924cf7c 1085 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 1086 __asm__( \
wolfSSL 15:117db924cf7c 1087 " mulld r16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1088 " addc %0,%0,r16 \n\t" \
wolfSSL 15:117db924cf7c 1089 " mulhdu r16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1090 " adde %1,%1,r16 \n\t" \
wolfSSL 15:117db924cf7c 1091 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1092 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "cc");
wolfSSL 15:117db924cf7c 1093
wolfSSL 15:117db924cf7c 1094 #define SQRADDDB \
wolfSSL 15:117db924cf7c 1095 __asm__( \
wolfSSL 15:117db924cf7c 1096 " addc %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1097 " adde %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1098 " adde %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 1099 " addc %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1100 " adde %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1101 " adde %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 1102 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 15:117db924cf7c 1103
wolfSSL 15:117db924cf7c 1104
wolfSSL 15:117db924cf7c 1105 #elif defined(TFM_AVR32)
wolfSSL 15:117db924cf7c 1106
wolfSSL 15:117db924cf7c 1107 /* AVR32 */
wolfSSL 15:117db924cf7c 1108
wolfSSL 15:117db924cf7c 1109 #define COMBA_START
wolfSSL 15:117db924cf7c 1110
wolfSSL 15:117db924cf7c 1111 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 1112 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1113
wolfSSL 15:117db924cf7c 1114 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1115 x = c0;
wolfSSL 15:117db924cf7c 1116
wolfSSL 15:117db924cf7c 1117 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1118 x = c1;
wolfSSL 15:117db924cf7c 1119
wolfSSL 15:117db924cf7c 1120 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 1121 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1122
wolfSSL 15:117db924cf7c 1123 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1124
wolfSSL 15:117db924cf7c 1125 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 15:117db924cf7c 1126 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 1127 __asm__( \
wolfSSL 15:117db924cf7c 1128 " mulu.d r2,%6,%6 \n\t" \
wolfSSL 15:117db924cf7c 1129 " add %0,%0,r2 \n\t" \
wolfSSL 15:117db924cf7c 1130 " adc %1,%1,r3 \n\t" \
wolfSSL 15:117db924cf7c 1131 " acr %2 \n\t" \
wolfSSL 15:117db924cf7c 1132 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
wolfSSL 15:117db924cf7c 1133
wolfSSL 15:117db924cf7c 1134 /* for squaring some of the terms are doubled... */
wolfSSL 15:117db924cf7c 1135 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 1136 __asm__( \
wolfSSL 15:117db924cf7c 1137 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1138 " add %0,%0,r2 \n\t" \
wolfSSL 15:117db924cf7c 1139 " adc %1,%1,r3 \n\t" \
wolfSSL 15:117db924cf7c 1140 " acr %2, \n\t" \
wolfSSL 15:117db924cf7c 1141 " add %0,%0,r2 \n\t" \
wolfSSL 15:117db924cf7c 1142 " adc %1,%1,r3 \n\t" \
wolfSSL 15:117db924cf7c 1143 " acr %2, \n\t" \
wolfSSL 15:117db924cf7c 1144 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
wolfSSL 15:117db924cf7c 1145
wolfSSL 15:117db924cf7c 1146 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 1147 __asm__( \
wolfSSL 15:117db924cf7c 1148 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1149 " mov %0,r2 \n\t" \
wolfSSL 15:117db924cf7c 1150 " mov %1,r3 \n\t" \
wolfSSL 15:117db924cf7c 1151 " eor %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1152 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
wolfSSL 15:117db924cf7c 1153
wolfSSL 15:117db924cf7c 1154 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 1155 __asm__( \
wolfSSL 15:117db924cf7c 1156 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1157 " add %0,%0,r2 \n\t" \
wolfSSL 15:117db924cf7c 1158 " adc %1,%1,r3 \n\t" \
wolfSSL 15:117db924cf7c 1159 " acr %2 \n\t" \
wolfSSL 15:117db924cf7c 1160 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
wolfSSL 15:117db924cf7c 1161
wolfSSL 15:117db924cf7c 1162 #define SQRADDDB \
wolfSSL 15:117db924cf7c 1163 __asm__( \
wolfSSL 15:117db924cf7c 1164 " add %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1165 " adc %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1166 " adc %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 1167 " add %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1168 " adc %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1169 " adc %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 1170 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 15:117db924cf7c 1171
wolfSSL 15:117db924cf7c 1172 #elif defined(TFM_MIPS)
wolfSSL 15:117db924cf7c 1173
wolfSSL 15:117db924cf7c 1174 /* MIPS */
wolfSSL 15:117db924cf7c 1175 #define COMBA_START
wolfSSL 15:117db924cf7c 1176
wolfSSL 15:117db924cf7c 1177 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 1178 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1179
wolfSSL 15:117db924cf7c 1180 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1181 x = c0;
wolfSSL 15:117db924cf7c 1182
wolfSSL 15:117db924cf7c 1183 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1184 x = c1;
wolfSSL 15:117db924cf7c 1185
wolfSSL 15:117db924cf7c 1186 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 1187 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1188
wolfSSL 15:117db924cf7c 1189 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1190
wolfSSL 15:117db924cf7c 1191 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 15:117db924cf7c 1192 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 1193 __asm__( \
wolfSSL 15:117db924cf7c 1194 " multu %6,%6 \n\t" \
wolfSSL 15:117db924cf7c 1195 " mflo $12 \n\t" \
wolfSSL 15:117db924cf7c 1196 " mfhi $13 \n\t" \
wolfSSL 15:117db924cf7c 1197 " addu %0,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1198 " sltu $12,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1199 " addu %1,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1200 " sltu $13,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1201 " addu %1,%1,$12 \n\t" \
wolfSSL 15:117db924cf7c 1202 " sltu $12,%1,$12 \n\t" \
wolfSSL 15:117db924cf7c 1203 " addu %2,%2,$13 \n\t" \
wolfSSL 15:117db924cf7c 1204 " addu %2,%2,$12 \n\t" \
wolfSSL 15:117db924cf7c 1205 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
wolfSSL 15:117db924cf7c 1206
wolfSSL 15:117db924cf7c 1207 /* for squaring some of the terms are doubled... */
wolfSSL 15:117db924cf7c 1208 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 1209 __asm__( \
wolfSSL 15:117db924cf7c 1210 " multu %6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1211 " mflo $12 \n\t" \
wolfSSL 15:117db924cf7c 1212 " mfhi $13 \n\t" \
wolfSSL 15:117db924cf7c 1213 \
wolfSSL 15:117db924cf7c 1214 " addu %0,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1215 " sltu $14,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1216 " addu %1,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1217 " sltu $15,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1218 " addu %1,%1,$14 \n\t" \
wolfSSL 15:117db924cf7c 1219 " sltu $14,%1,$14 \n\t" \
wolfSSL 15:117db924cf7c 1220 " addu %2,%2,$15 \n\t" \
wolfSSL 15:117db924cf7c 1221 " addu %2,%2,$14 \n\t" \
wolfSSL 15:117db924cf7c 1222 \
wolfSSL 15:117db924cf7c 1223 " addu %0,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1224 " sltu $14,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1225 " addu %1,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1226 " sltu $15,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1227 " addu %1,%1,$14 \n\t" \
wolfSSL 15:117db924cf7c 1228 " sltu $14,%1,$14 \n\t" \
wolfSSL 15:117db924cf7c 1229 " addu %2,%2,$15 \n\t" \
wolfSSL 15:117db924cf7c 1230 " addu %2,%2,$14 \n\t" \
wolfSSL 15:117db924cf7c 1231 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
wolfSSL 15:117db924cf7c 1232
wolfSSL 15:117db924cf7c 1233 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 1234 __asm__( \
wolfSSL 15:117db924cf7c 1235 " multu %6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1236 " mflo %0 \n\t" \
wolfSSL 15:117db924cf7c 1237 " mfhi %1 \n\t" \
wolfSSL 15:117db924cf7c 1238 " xor %2,%2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1239 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
wolfSSL 15:117db924cf7c 1240
wolfSSL 15:117db924cf7c 1241 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 1242 __asm__( \
wolfSSL 15:117db924cf7c 1243 " multu %6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1244 " mflo $12 \n\t" \
wolfSSL 15:117db924cf7c 1245 " mfhi $13 \n\t" \
wolfSSL 15:117db924cf7c 1246 " addu %0,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1247 " sltu $12,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1248 " addu %1,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1249 " sltu $13,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1250 " addu %1,%1,$12 \n\t" \
wolfSSL 15:117db924cf7c 1251 " sltu $12,%1,$12 \n\t" \
wolfSSL 15:117db924cf7c 1252 " addu %2,%2,$13 \n\t" \
wolfSSL 15:117db924cf7c 1253 " addu %2,%2,$12 \n\t" \
wolfSSL 15:117db924cf7c 1254 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
wolfSSL 15:117db924cf7c 1255
wolfSSL 15:117db924cf7c 1256 #define SQRADDDB \
wolfSSL 15:117db924cf7c 1257 __asm__( \
wolfSSL 15:117db924cf7c 1258 " addu %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1259 " sltu $10,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1260 " addu %1,%1,$10 \n\t" \
wolfSSL 15:117db924cf7c 1261 " sltu $10,%1,$10 \n\t" \
wolfSSL 15:117db924cf7c 1262 " addu %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1263 " sltu $11,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1264 " addu %2,%2,$10 \n\t" \
wolfSSL 15:117db924cf7c 1265 " addu %2,%2,$11 \n\t" \
wolfSSL 15:117db924cf7c 1266 " addu %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 1267 \
wolfSSL 15:117db924cf7c 1268 " addu %0,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1269 " sltu $10,%0,%3 \n\t" \
wolfSSL 15:117db924cf7c 1270 " addu %1,%1,$10 \n\t" \
wolfSSL 15:117db924cf7c 1271 " sltu $10,%1,$10 \n\t" \
wolfSSL 15:117db924cf7c 1272 " addu %1,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1273 " sltu $11,%1,%4 \n\t" \
wolfSSL 15:117db924cf7c 1274 " addu %2,%2,$10 \n\t" \
wolfSSL 15:117db924cf7c 1275 " addu %2,%2,$11 \n\t" \
wolfSSL 15:117db924cf7c 1276 " addu %2,%2,%5 \n\t" \
wolfSSL 15:117db924cf7c 1277 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
wolfSSL 15:117db924cf7c 1278
wolfSSL 15:117db924cf7c 1279 #else
wolfSSL 15:117db924cf7c 1280
wolfSSL 15:117db924cf7c 1281 #define TFM_ISO
wolfSSL 15:117db924cf7c 1282
wolfSSL 15:117db924cf7c 1283 /* ISO C portable code */
wolfSSL 15:117db924cf7c 1284
wolfSSL 15:117db924cf7c 1285 #define COMBA_START
wolfSSL 15:117db924cf7c 1286
wolfSSL 15:117db924cf7c 1287 #define CLEAR_CARRY \
wolfSSL 15:117db924cf7c 1288 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1289
wolfSSL 15:117db924cf7c 1290 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1291 x = c0;
wolfSSL 15:117db924cf7c 1292
wolfSSL 15:117db924cf7c 1293 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1294 x = c1;
wolfSSL 15:117db924cf7c 1295
wolfSSL 15:117db924cf7c 1296 #define CARRY_FORWARD \
wolfSSL 15:117db924cf7c 1297 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1298
wolfSSL 15:117db924cf7c 1299 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1300
wolfSSL 15:117db924cf7c 1301 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 15:117db924cf7c 1302 #define SQRADD(i, j) \
wolfSSL 15:117db924cf7c 1303 do { fp_word t; \
wolfSSL 15:117db924cf7c 1304 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
wolfSSL 15:117db924cf7c 1305 t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \
wolfSSL 15:117db924cf7c 1306 c2 +=(fp_digit) (t >> DIGIT_BIT); \
wolfSSL 15:117db924cf7c 1307 } while (0);
wolfSSL 15:117db924cf7c 1308
wolfSSL 15:117db924cf7c 1309
wolfSSL 15:117db924cf7c 1310 /* for squaring some of the terms are doubled... */
wolfSSL 15:117db924cf7c 1311 #define SQRADD2(i, j) \
wolfSSL 15:117db924cf7c 1312 do { fp_word t; \
wolfSSL 15:117db924cf7c 1313 t = ((fp_word)i) * ((fp_word)j); \
wolfSSL 15:117db924cf7c 1314 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
wolfSSL 15:117db924cf7c 1315 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
wolfSSL 15:117db924cf7c 1316 c2 +=(fp_digit)(tt >> DIGIT_BIT); \
wolfSSL 15:117db924cf7c 1317 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
wolfSSL 15:117db924cf7c 1318 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
wolfSSL 15:117db924cf7c 1319 c2 +=(fp_digit)(tt >> DIGIT_BIT); \
wolfSSL 15:117db924cf7c 1320 } while (0);
wolfSSL 15:117db924cf7c 1321
wolfSSL 15:117db924cf7c 1322 #define SQRADDSC(i, j) \
wolfSSL 15:117db924cf7c 1323 do { fp_word t; \
wolfSSL 15:117db924cf7c 1324 t = ((fp_word)i) * ((fp_word)j); \
wolfSSL 15:117db924cf7c 1325 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
wolfSSL 15:117db924cf7c 1326 } while (0);
wolfSSL 15:117db924cf7c 1327
wolfSSL 15:117db924cf7c 1328 #define SQRADDAC(i, j) \
wolfSSL 15:117db924cf7c 1329 do { fp_word t; \
wolfSSL 15:117db924cf7c 1330 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = (fp_digit)t; \
wolfSSL 15:117db924cf7c 1331 t = sc1 + (t >> DIGIT_BIT); sc1 = (fp_digit)t; \
wolfSSL 15:117db924cf7c 1332 sc2 += (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 15:117db924cf7c 1333 } while (0);
wolfSSL 15:117db924cf7c 1334
wolfSSL 15:117db924cf7c 1335 #define SQRADDDB \
wolfSSL 15:117db924cf7c 1336 do { fp_word t; \
wolfSSL 15:117db924cf7c 1337 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = (fp_digit)t; \
wolfSSL 15:117db924cf7c 1338 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); \
wolfSSL 15:117db924cf7c 1339 c1 = (fp_digit)t; \
wolfSSL 15:117db924cf7c 1340 c2 = c2 + (fp_digit)(((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT)); \
wolfSSL 15:117db924cf7c 1341 } while (0);
wolfSSL 15:117db924cf7c 1342
wolfSSL 15:117db924cf7c 1343 #endif
wolfSSL 15:117db924cf7c 1344
wolfSSL 15:117db924cf7c 1345 #ifdef TFM_SMALL_SET
wolfSSL 15:117db924cf7c 1346 #include "fp_sqr_comba_small_set.i"
wolfSSL 15:117db924cf7c 1347 #endif
wolfSSL 15:117db924cf7c 1348
wolfSSL 15:117db924cf7c 1349 #if defined(TFM_SQR3) && FP_SIZE >= 6
wolfSSL 15:117db924cf7c 1350 #include "fp_sqr_comba_3.i"
wolfSSL 15:117db924cf7c 1351 #endif
wolfSSL 15:117db924cf7c 1352 #if defined(TFM_SQR4) && FP_SIZE >= 8
wolfSSL 15:117db924cf7c 1353 #include "fp_sqr_comba_4.i"
wolfSSL 15:117db924cf7c 1354 #endif
wolfSSL 15:117db924cf7c 1355 #if defined(TFM_SQR6) && FP_SIZE >= 12
wolfSSL 15:117db924cf7c 1356 #include "fp_sqr_comba_6.i"
wolfSSL 15:117db924cf7c 1357 #endif
wolfSSL 15:117db924cf7c 1358 #if defined(TFM_SQR7) && FP_SIZE >= 14
wolfSSL 15:117db924cf7c 1359 #include "fp_sqr_comba_7.i"
wolfSSL 15:117db924cf7c 1360 #endif
wolfSSL 15:117db924cf7c 1361 #if defined(TFM_SQR8) && FP_SIZE >= 16
wolfSSL 15:117db924cf7c 1362 #include "fp_sqr_comba_8.i"
wolfSSL 15:117db924cf7c 1363 #endif
wolfSSL 15:117db924cf7c 1364 #if defined(TFM_SQR9) && FP_SIZE >= 18
wolfSSL 15:117db924cf7c 1365 #include "fp_sqr_comba_9.i"
wolfSSL 15:117db924cf7c 1366 #endif
wolfSSL 15:117db924cf7c 1367 #if defined(TFM_SQR12) && FP_SIZE >= 24
wolfSSL 15:117db924cf7c 1368 #include "fp_sqr_comba_12.i"
wolfSSL 15:117db924cf7c 1369 #endif
wolfSSL 15:117db924cf7c 1370 #if defined(TFM_SQR17) && FP_SIZE >= 34
wolfSSL 15:117db924cf7c 1371 #include "fp_sqr_comba_17.i"
wolfSSL 15:117db924cf7c 1372 #endif
wolfSSL 15:117db924cf7c 1373 #if defined(TFM_SQR20) && FP_SIZE >= 40
wolfSSL 15:117db924cf7c 1374 #include "fp_sqr_comba_20.i"
wolfSSL 15:117db924cf7c 1375 #endif
wolfSSL 15:117db924cf7c 1376 #if defined(TFM_SQR24) && FP_SIZE >= 48
wolfSSL 15:117db924cf7c 1377 #include "fp_sqr_comba_24.i"
wolfSSL 15:117db924cf7c 1378 #endif
wolfSSL 15:117db924cf7c 1379 #if defined(TFM_SQR28) && FP_SIZE >= 56
wolfSSL 15:117db924cf7c 1380 #include "fp_sqr_comba_28.i"
wolfSSL 15:117db924cf7c 1381 #endif
wolfSSL 15:117db924cf7c 1382 #if defined(TFM_SQR32) && FP_SIZE >= 64
wolfSSL 15:117db924cf7c 1383 #include "fp_sqr_comba_32.i"
wolfSSL 15:117db924cf7c 1384 #endif
wolfSSL 15:117db924cf7c 1385 #if defined(TFM_SQR48) && FP_SIZE >= 96
wolfSSL 15:117db924cf7c 1386 #include "fp_sqr_comba_48.i"
wolfSSL 15:117db924cf7c 1387 #endif
wolfSSL 15:117db924cf7c 1388 #if defined(TFM_SQR64) && FP_SIZE >= 128
wolfSSL 15:117db924cf7c 1389 #include "fp_sqr_comba_64.i"
wolfSSL 15:117db924cf7c 1390 #endif
wolfSSL 15:117db924cf7c 1391 /* end fp_sqr_comba.c asm */
wolfSSL 15:117db924cf7c 1392
wolfSSL 15:117db924cf7c 1393 /* start fp_mul_comba.c asm */
wolfSSL 15:117db924cf7c 1394 /* these are the combas. Worship them. */
wolfSSL 15:117db924cf7c 1395 #if defined(TFM_X86)
wolfSSL 15:117db924cf7c 1396 /* Generic x86 optimized code */
wolfSSL 15:117db924cf7c 1397
wolfSSL 15:117db924cf7c 1398 /* anything you need at the start */
wolfSSL 15:117db924cf7c 1399 #define COMBA_START
wolfSSL 15:117db924cf7c 1400
wolfSSL 15:117db924cf7c 1401 /* clear the chaining variables */
wolfSSL 15:117db924cf7c 1402 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1403 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1404
wolfSSL 15:117db924cf7c 1405 /* forward the carry to the next digit */
wolfSSL 15:117db924cf7c 1406 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1407 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1408
wolfSSL 15:117db924cf7c 1409 /* store the first sum */
wolfSSL 15:117db924cf7c 1410 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1411 x = c0;
wolfSSL 15:117db924cf7c 1412
wolfSSL 15:117db924cf7c 1413 /* store the second sum [carry] */
wolfSSL 15:117db924cf7c 1414 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1415 x = c1;
wolfSSL 15:117db924cf7c 1416
wolfSSL 15:117db924cf7c 1417 /* anything you need at the end */
wolfSSL 15:117db924cf7c 1418 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1419
wolfSSL 15:117db924cf7c 1420 /* this should multiply i and j */
wolfSSL 15:117db924cf7c 1421 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1422 __asm__( \
wolfSSL 15:117db924cf7c 1423 "movl %6,%%eax \n\t" \
wolfSSL 15:117db924cf7c 1424 "mull %7 \n\t" \
wolfSSL 15:117db924cf7c 1425 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 1426 "adcl %%edx,%1 \n\t" \
wolfSSL 15:117db924cf7c 1427 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 1428 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 15:117db924cf7c 1429
wolfSSL 15:117db924cf7c 1430 #elif defined(TFM_X86_64)
wolfSSL 15:117db924cf7c 1431 /* x86-64 optimized */
wolfSSL 15:117db924cf7c 1432
wolfSSL 15:117db924cf7c 1433 /* anything you need at the start */
wolfSSL 15:117db924cf7c 1434 #define COMBA_START
wolfSSL 15:117db924cf7c 1435
wolfSSL 15:117db924cf7c 1436 /* clear the chaining variables */
wolfSSL 15:117db924cf7c 1437 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1438 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1439
wolfSSL 15:117db924cf7c 1440 /* forward the carry to the next digit */
wolfSSL 15:117db924cf7c 1441 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1442 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1443
wolfSSL 15:117db924cf7c 1444 /* store the first sum */
wolfSSL 15:117db924cf7c 1445 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1446 x = c0;
wolfSSL 15:117db924cf7c 1447
wolfSSL 15:117db924cf7c 1448 /* store the second sum [carry] */
wolfSSL 15:117db924cf7c 1449 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1450 x = c1;
wolfSSL 15:117db924cf7c 1451
wolfSSL 15:117db924cf7c 1452 /* anything you need at the end */
wolfSSL 15:117db924cf7c 1453 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1454
wolfSSL 15:117db924cf7c 1455 /* this should multiply i and j */
wolfSSL 15:117db924cf7c 1456 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1457 __asm__ ( \
wolfSSL 15:117db924cf7c 1458 "movq %6,%%rax \n\t" \
wolfSSL 15:117db924cf7c 1459 "mulq %7 \n\t" \
wolfSSL 15:117db924cf7c 1460 "addq %%rax,%0 \n\t" \
wolfSSL 15:117db924cf7c 1461 "adcq %%rdx,%1 \n\t" \
wolfSSL 15:117db924cf7c 1462 "adcq $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 1463 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 15:117db924cf7c 1464
wolfSSL 15:117db924cf7c 1465
wolfSSL 15:117db924cf7c 1466 #if defined(HAVE_INTEL_MULX)
wolfSSL 15:117db924cf7c 1467 #define MULADD_BODY(a,b,c) \
wolfSSL 15:117db924cf7c 1468 __asm__ volatile( \
wolfSSL 15:117db924cf7c 1469 "movq %[a0],%%rdx\n\t" \
wolfSSL 15:117db924cf7c 1470 "xorq %%rcx, %%rcx\n\t" \
wolfSSL 15:117db924cf7c 1471 "movq 0(%[cp]),%%r8\n\t" \
wolfSSL 15:117db924cf7c 1472 "movq 8(%[cp]),%%r9\n\t" \
wolfSSL 15:117db924cf7c 1473 "movq 16(%[cp]),%%r10\n\t" \
wolfSSL 15:117db924cf7c 1474 "movq 24(%[cp]),%%r11\n\t" \
wolfSSL 15:117db924cf7c 1475 "movq 32(%[cp]),%%r12\n\t" \
wolfSSL 15:117db924cf7c 1476 "movq 40(%[cp]),%%r13\n\t" \
wolfSSL 15:117db924cf7c 1477 \
wolfSSL 15:117db924cf7c 1478 "mulx (%[bp]),%%rax, %%rbx\n\t" \
wolfSSL 15:117db924cf7c 1479 "adoxq %%rax, %%r8\n\t" \
wolfSSL 15:117db924cf7c 1480 "mulx 8(%[bp]),%%rax, %%rcx\n\t" \
wolfSSL 15:117db924cf7c 1481 "adcxq %%rbx, %%r9\n\t" \
wolfSSL 15:117db924cf7c 1482 "adoxq %%rax, %%r9\n\t" \
wolfSSL 15:117db924cf7c 1483 "mulx 16(%[bp]),%%rax, %%rbx\n\t" \
wolfSSL 15:117db924cf7c 1484 "adcxq %%rcx, %%r10\n\t" \
wolfSSL 15:117db924cf7c 1485 "adoxq %%rax, %%r10\n\t" \
wolfSSL 15:117db924cf7c 1486 "mulx 24(%[bp]),%%rax, %%rcx\n\t" \
wolfSSL 15:117db924cf7c 1487 "adcxq %%rbx, %%r11\n\t" \
wolfSSL 15:117db924cf7c 1488 "adoxq %%rax, %%r11\n\t" \
wolfSSL 15:117db924cf7c 1489 "adcxq %%rcx, %%r12\n\t" \
wolfSSL 15:117db924cf7c 1490 "mov $0, %%rdx\n\t" \
wolfSSL 15:117db924cf7c 1491 "adox %%rdx, %%r12\n\t" \
wolfSSL 15:117db924cf7c 1492 "adcx %%rdx, %%r13\n\t" \
wolfSSL 15:117db924cf7c 1493 \
wolfSSL 15:117db924cf7c 1494 "movq %%r8, 0(%[cp])\n\t" \
wolfSSL 15:117db924cf7c 1495 "movq %%r9, 8(%[cp])\n\t" \
wolfSSL 15:117db924cf7c 1496 "movq %%r10, 16(%[cp])\n\t" \
wolfSSL 15:117db924cf7c 1497 "movq %%r11, 24(%[cp])\n\t" \
wolfSSL 15:117db924cf7c 1498 "movq %%r12, 32(%[cp])\n\t" \
wolfSSL 15:117db924cf7c 1499 "movq %%r13, 40(%[cp])\n\t" \
wolfSSL 15:117db924cf7c 1500 : \
wolfSSL 15:117db924cf7c 1501 : [a0] "r" (a->dp[ix]), [bp] "r" (&(b->dp[iy])), \
wolfSSL 15:117db924cf7c 1502 [cp] "r" (&(c->dp[iz])) \
wolfSSL 15:117db924cf7c 1503 : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", \
wolfSSL 15:117db924cf7c 1504 "%rdx", "%rax", "%rcx", "%rbx" \
wolfSSL 15:117db924cf7c 1505 )
wolfSSL 15:117db924cf7c 1506
wolfSSL 15:117db924cf7c 1507 #define TFM_INTEL_MUL_COMBA(a, b, c) \
wolfSSL 15:117db924cf7c 1508 for (iz=0; iz<pa; iz++) c->dp[iz] = 0; \
wolfSSL 15:117db924cf7c 1509 for (ix=0; ix<a->used; ix++) { \
wolfSSL 15:117db924cf7c 1510 for (iy=0; iy<b->used; iy+=4) { \
wolfSSL 15:117db924cf7c 1511 iz = ix + iy; \
wolfSSL 15:117db924cf7c 1512 MULADD_BODY(a, b, c); \
wolfSSL 15:117db924cf7c 1513 } \
wolfSSL 15:117db924cf7c 1514 }
wolfSSL 15:117db924cf7c 1515 #endif
wolfSSL 15:117db924cf7c 1516
wolfSSL 15:117db924cf7c 1517 #elif defined(TFM_SSE2)
wolfSSL 15:117db924cf7c 1518 /* use SSE2 optimizations */
wolfSSL 15:117db924cf7c 1519
wolfSSL 15:117db924cf7c 1520 /* anything you need at the start */
wolfSSL 15:117db924cf7c 1521 #define COMBA_START
wolfSSL 15:117db924cf7c 1522
wolfSSL 15:117db924cf7c 1523 /* clear the chaining variables */
wolfSSL 15:117db924cf7c 1524 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1525 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1526
wolfSSL 15:117db924cf7c 1527 /* forward the carry to the next digit */
wolfSSL 15:117db924cf7c 1528 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1529 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1530
wolfSSL 15:117db924cf7c 1531 /* store the first sum */
wolfSSL 15:117db924cf7c 1532 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1533 x = c0;
wolfSSL 15:117db924cf7c 1534
wolfSSL 15:117db924cf7c 1535 /* store the second sum [carry] */
wolfSSL 15:117db924cf7c 1536 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1537 x = c1;
wolfSSL 15:117db924cf7c 1538
wolfSSL 15:117db924cf7c 1539 /* anything you need at the end */
wolfSSL 15:117db924cf7c 1540 #define COMBA_FINI \
wolfSSL 15:117db924cf7c 1541 __asm__("emms");
wolfSSL 15:117db924cf7c 1542
wolfSSL 15:117db924cf7c 1543 /* this should multiply i and j */
wolfSSL 15:117db924cf7c 1544 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1545 __asm__( \
wolfSSL 15:117db924cf7c 1546 "movd %6,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 1547 "movd %7,%%mm1 \n\t" \
wolfSSL 15:117db924cf7c 1548 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 15:117db924cf7c 1549 "movd %%mm0,%%eax \n\t" \
wolfSSL 15:117db924cf7c 1550 "psrlq $32,%%mm0 \n\t" \
wolfSSL 15:117db924cf7c 1551 "addl %%eax,%0 \n\t" \
wolfSSL 15:117db924cf7c 1552 "movd %%mm0,%%eax \n\t" \
wolfSSL 15:117db924cf7c 1553 "adcl %%eax,%1 \n\t" \
wolfSSL 15:117db924cf7c 1554 "adcl $0,%2 \n\t" \
wolfSSL 15:117db924cf7c 1555 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
wolfSSL 15:117db924cf7c 1556
wolfSSL 15:117db924cf7c 1557 #elif defined(TFM_ARM)
wolfSSL 15:117db924cf7c 1558 /* ARM code */
wolfSSL 15:117db924cf7c 1559
wolfSSL 15:117db924cf7c 1560 #define COMBA_START
wolfSSL 15:117db924cf7c 1561
wolfSSL 15:117db924cf7c 1562 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1563 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1564
wolfSSL 15:117db924cf7c 1565 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1566 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1567
wolfSSL 15:117db924cf7c 1568 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1569 x = c0;
wolfSSL 15:117db924cf7c 1570
wolfSSL 15:117db924cf7c 1571 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1572 x = c1;
wolfSSL 15:117db924cf7c 1573
wolfSSL 15:117db924cf7c 1574 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1575
wolfSSL 15:117db924cf7c 1576 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1577 __asm__( \
wolfSSL 15:117db924cf7c 1578 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1579 " ADDS %0,%0,r0 \n\t" \
wolfSSL 15:117db924cf7c 1580 " ADCS %1,%1,r1 \n\t" \
wolfSSL 15:117db924cf7c 1581 " ADC %2,%2,#0 \n\t" \
wolfSSL 15:117db924cf7c 1582 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 15:117db924cf7c 1583
wolfSSL 15:117db924cf7c 1584 #elif defined(TFM_PPC32)
wolfSSL 15:117db924cf7c 1585 /* For 32-bit PPC */
wolfSSL 15:117db924cf7c 1586
wolfSSL 15:117db924cf7c 1587 #define COMBA_START
wolfSSL 15:117db924cf7c 1588
wolfSSL 15:117db924cf7c 1589 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1590 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1591
wolfSSL 15:117db924cf7c 1592 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1593 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1594
wolfSSL 15:117db924cf7c 1595 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1596 x = c0;
wolfSSL 15:117db924cf7c 1597
wolfSSL 15:117db924cf7c 1598 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1599 x = c1;
wolfSSL 15:117db924cf7c 1600
wolfSSL 15:117db924cf7c 1601 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1602
wolfSSL 15:117db924cf7c 1603 /* untested: will mulhwu change the flags? Docs say no */
wolfSSL 15:117db924cf7c 1604 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1605 __asm__( \
wolfSSL 15:117db924cf7c 1606 " mullw 16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1607 " addc %0,%0,16 \n\t" \
wolfSSL 15:117db924cf7c 1608 " mulhwu 16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1609 " adde %1,%1,16 \n\t" \
wolfSSL 15:117db924cf7c 1610 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1611 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
wolfSSL 15:117db924cf7c 1612
wolfSSL 15:117db924cf7c 1613 #elif defined(TFM_PPC64)
wolfSSL 15:117db924cf7c 1614 /* For 64-bit PPC */
wolfSSL 15:117db924cf7c 1615
wolfSSL 15:117db924cf7c 1616 #define COMBA_START
wolfSSL 15:117db924cf7c 1617
wolfSSL 15:117db924cf7c 1618 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1619 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1620
wolfSSL 15:117db924cf7c 1621 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1622 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1623
wolfSSL 15:117db924cf7c 1624 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1625 x = c0;
wolfSSL 15:117db924cf7c 1626
wolfSSL 15:117db924cf7c 1627 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1628 x = c1;
wolfSSL 15:117db924cf7c 1629
wolfSSL 15:117db924cf7c 1630 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1631
wolfSSL 15:117db924cf7c 1632 /* untested: will mulhdu change the flags? Docs say no */
wolfSSL 15:117db924cf7c 1633 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1634 ____asm__( \
wolfSSL 15:117db924cf7c 1635 " mulld r16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1636 " addc %0,%0,16 \n\t" \
wolfSSL 15:117db924cf7c 1637 " mulhdu r16,%6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1638 " adde %1,%1,16 \n\t" \
wolfSSL 15:117db924cf7c 1639 " addze %2,%2 \n\t" \
wolfSSL 15:117db924cf7c 1640 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
wolfSSL 15:117db924cf7c 1641
wolfSSL 15:117db924cf7c 1642 #elif defined(TFM_AVR32)
wolfSSL 15:117db924cf7c 1643
wolfSSL 15:117db924cf7c 1644 /* ISO C code */
wolfSSL 15:117db924cf7c 1645
wolfSSL 15:117db924cf7c 1646 #define COMBA_START
wolfSSL 15:117db924cf7c 1647
wolfSSL 15:117db924cf7c 1648 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1649 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1650
wolfSSL 15:117db924cf7c 1651 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1652 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1653
wolfSSL 15:117db924cf7c 1654 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1655 x = c0;
wolfSSL 15:117db924cf7c 1656
wolfSSL 15:117db924cf7c 1657 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1658 x = c1;
wolfSSL 15:117db924cf7c 1659
wolfSSL 15:117db924cf7c 1660 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1661
wolfSSL 15:117db924cf7c 1662 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1663 ____asm__( \
wolfSSL 15:117db924cf7c 1664 " mulu.d r2,%6,%7 \n\t"\
wolfSSL 15:117db924cf7c 1665 " add %0,r2 \n\t"\
wolfSSL 15:117db924cf7c 1666 " adc %1,%1,r3 \n\t"\
wolfSSL 15:117db924cf7c 1667 " acr %2 \n\t"\
wolfSSL 15:117db924cf7c 1668 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
wolfSSL 15:117db924cf7c 1669
wolfSSL 15:117db924cf7c 1670 #elif defined(TFM_MIPS)
wolfSSL 15:117db924cf7c 1671
wolfSSL 15:117db924cf7c 1672 /* MIPS */
wolfSSL 15:117db924cf7c 1673 #define COMBA_START
wolfSSL 15:117db924cf7c 1674
wolfSSL 15:117db924cf7c 1675 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1676 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1677
wolfSSL 15:117db924cf7c 1678 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1679 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1680
wolfSSL 15:117db924cf7c 1681 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1682 x = c0;
wolfSSL 15:117db924cf7c 1683
wolfSSL 15:117db924cf7c 1684 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1685 x = c1;
wolfSSL 15:117db924cf7c 1686
wolfSSL 15:117db924cf7c 1687 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1688
wolfSSL 15:117db924cf7c 1689 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1690 __asm__( \
wolfSSL 15:117db924cf7c 1691 " multu %6,%7 \n\t" \
wolfSSL 15:117db924cf7c 1692 " mflo $12 \n\t" \
wolfSSL 15:117db924cf7c 1693 " mfhi $13 \n\t" \
wolfSSL 15:117db924cf7c 1694 " addu %0,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1695 " sltu $12,%0,$12 \n\t" \
wolfSSL 15:117db924cf7c 1696 " addu %1,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1697 " sltu $13,%1,$13 \n\t" \
wolfSSL 15:117db924cf7c 1698 " addu %1,%1,$12 \n\t" \
wolfSSL 15:117db924cf7c 1699 " sltu $12,%1,$12 \n\t" \
wolfSSL 15:117db924cf7c 1700 " addu %2,%2,$13 \n\t" \
wolfSSL 15:117db924cf7c 1701 " addu %2,%2,$12 \n\t" \
wolfSSL 15:117db924cf7c 1702 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
wolfSSL 15:117db924cf7c 1703
wolfSSL 15:117db924cf7c 1704 #else
wolfSSL 15:117db924cf7c 1705 /* ISO C code */
wolfSSL 15:117db924cf7c 1706
wolfSSL 15:117db924cf7c 1707 #define COMBA_START
wolfSSL 15:117db924cf7c 1708
wolfSSL 15:117db924cf7c 1709 #define COMBA_CLEAR \
wolfSSL 15:117db924cf7c 1710 c0 = c1 = c2 = 0;
wolfSSL 15:117db924cf7c 1711
wolfSSL 15:117db924cf7c 1712 #define COMBA_FORWARD \
wolfSSL 15:117db924cf7c 1713 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 15:117db924cf7c 1714
wolfSSL 15:117db924cf7c 1715 #define COMBA_STORE(x) \
wolfSSL 15:117db924cf7c 1716 x = c0;
wolfSSL 15:117db924cf7c 1717
wolfSSL 15:117db924cf7c 1718 #define COMBA_STORE2(x) \
wolfSSL 15:117db924cf7c 1719 x = c1;
wolfSSL 15:117db924cf7c 1720
wolfSSL 15:117db924cf7c 1721 #define COMBA_FINI
wolfSSL 15:117db924cf7c 1722
wolfSSL 15:117db924cf7c 1723 #define MULADD(i, j) \
wolfSSL 15:117db924cf7c 1724 do { fp_word t; \
wolfSSL 15:117db924cf7c 1725 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); \
wolfSSL 15:117db924cf7c 1726 c0 = (fp_digit)t; \
wolfSSL 15:117db924cf7c 1727 t = (fp_word)c1 + (t >> DIGIT_BIT); \
wolfSSL 15:117db924cf7c 1728 c1 = (fp_digit)t; \
wolfSSL 15:117db924cf7c 1729 c2 += (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 15:117db924cf7c 1730 } while (0);
wolfSSL 15:117db924cf7c 1731
wolfSSL 15:117db924cf7c 1732 #endif
wolfSSL 15:117db924cf7c 1733
wolfSSL 15:117db924cf7c 1734
wolfSSL 15:117db924cf7c 1735 #ifdef TFM_SMALL_SET
wolfSSL 15:117db924cf7c 1736 #include "fp_mul_comba_small_set.i"
wolfSSL 15:117db924cf7c 1737 #endif
wolfSSL 15:117db924cf7c 1738
wolfSSL 15:117db924cf7c 1739 #if defined(TFM_MUL3) && FP_SIZE >= 6
wolfSSL 15:117db924cf7c 1740 #include "fp_mul_comba_3.i"
wolfSSL 15:117db924cf7c 1741 #endif
wolfSSL 15:117db924cf7c 1742 #if defined(TFM_MUL4) && FP_SIZE >= 8
wolfSSL 15:117db924cf7c 1743 #include "fp_mul_comba_4.i"
wolfSSL 15:117db924cf7c 1744 #endif
wolfSSL 15:117db924cf7c 1745 #if defined(TFM_MUL6) && FP_SIZE >= 12
wolfSSL 15:117db924cf7c 1746 #include "fp_mul_comba_6.i"
wolfSSL 15:117db924cf7c 1747 #endif
wolfSSL 15:117db924cf7c 1748 #if defined(TFM_MUL7) && FP_SIZE >= 14
wolfSSL 15:117db924cf7c 1749 #include "fp_mul_comba_7.i"
wolfSSL 15:117db924cf7c 1750 #endif
wolfSSL 15:117db924cf7c 1751 #if defined(TFM_MUL8) && FP_SIZE >= 16
wolfSSL 15:117db924cf7c 1752 #include "fp_mul_comba_8.i"
wolfSSL 15:117db924cf7c 1753 #endif
wolfSSL 15:117db924cf7c 1754 #if defined(TFM_MUL9) && FP_SIZE >= 18
wolfSSL 15:117db924cf7c 1755 #include "fp_mul_comba_9.i"
wolfSSL 15:117db924cf7c 1756 #endif
wolfSSL 15:117db924cf7c 1757 #if defined(TFM_MUL12) && FP_SIZE >= 24
wolfSSL 15:117db924cf7c 1758 #include "fp_mul_comba_12.i"
wolfSSL 15:117db924cf7c 1759 #endif
wolfSSL 15:117db924cf7c 1760 #if defined(TFM_MUL17) && FP_SIZE >= 34
wolfSSL 15:117db924cf7c 1761 #include "fp_mul_comba_17.i"
wolfSSL 15:117db924cf7c 1762 #endif
wolfSSL 15:117db924cf7c 1763 #if defined(TFM_MUL20) && FP_SIZE >= 40
wolfSSL 15:117db924cf7c 1764 #include "fp_mul_comba_20.i"
wolfSSL 15:117db924cf7c 1765 #endif
wolfSSL 15:117db924cf7c 1766 #if defined(TFM_MUL24) && FP_SIZE >= 48
wolfSSL 15:117db924cf7c 1767 #include "fp_mul_comba_24.i"
wolfSSL 15:117db924cf7c 1768 #endif
wolfSSL 15:117db924cf7c 1769 #if defined(TFM_MUL28) && FP_SIZE >= 56
wolfSSL 15:117db924cf7c 1770 #include "fp_mul_comba_28.i"
wolfSSL 15:117db924cf7c 1771 #endif
wolfSSL 15:117db924cf7c 1772 #if defined(TFM_MUL32) && FP_SIZE >= 64
wolfSSL 15:117db924cf7c 1773 #include "fp_mul_comba_32.i"
wolfSSL 15:117db924cf7c 1774 #endif
wolfSSL 15:117db924cf7c 1775 #if defined(TFM_MUL48) && FP_SIZE >= 96
wolfSSL 15:117db924cf7c 1776 #include "fp_mul_comba_48.i"
wolfSSL 15:117db924cf7c 1777 #endif
wolfSSL 15:117db924cf7c 1778 #if defined(TFM_MUL64) && FP_SIZE >= 128
wolfSSL 15:117db924cf7c 1779 #include "fp_mul_comba_64.i"
wolfSSL 15:117db924cf7c 1780 #endif
wolfSSL 15:117db924cf7c 1781
wolfSSL 15:117db924cf7c 1782 /* end fp_mul_comba.c asm */
wolfSSL 15:117db924cf7c 1783
wolfSSL 15:117db924cf7c 1784