LCOV - lcov.info - src/common/crypto

LCOV - code coverage report

Current view:	top level - src/common - crypto_p256.c (source / functions)		Coverage	Total	Hit
Test:	lcov.info	Lines:	96.1 %	180	173
Test Date:	2026-03-12 12:01:18	Functions:	100.0 %	24	24
Legend:	Lines: hit not hit \| Branches: + taken - not taken # not executed	Branches:	84.8 %	46	39

             Branch data     Line data    Source code

       1                 :             : /*
       2                 :             :  * Support for EC point decompression on secp256r1 curve.
       3                 :             :  *
       4                 :             :  * Copyright The Mbed TLS Contributors (for all math functions)
       5                 :             :  * Author: Manuel Pégourié-Gonnard.
       6                 :             :  *
       7                 :             :  * Copyright BayLibre SAS (for crypto_p256_uncompress_point())
       8                 :             :  *
       9                 :             :  * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
      10                 :             :  */
      11                 :             : 
      12                 :             : #include <string.h>
      13                 :             : #include <psa/crypto.h>
      14                 :             : #include <edhoc/buffer_sizes.h>
      15                 :             : #include "crypto_p256.h"
      16                 :             : 
      17                 :             : 
      18                 :             : /**********************************************************************
      19                 :             :  *
      20                 :             :  * Operations on fixed-width unsigned integers
      21                 :             :  *
      22                 :             :  * Represented using 32-bit limbs, least significant limb first.
      23                 :             :  * That is: x = x[0] + 2^32 x[1] + ... + 2^224 x[7] for 256-bit.
      24                 :             :  *
      25                 :             :  **********************************************************************/
      26                 :             : 
      27                 :             : /*
      28                 :             :  * 256-bit set to 32-bit value
      29                 :             :  *
      30                 :             :  * in: x in [0, 2^32)
      31                 :             :  * out: z = x
      32                 :             :  */
      33                 :          12 : static void u256_set32(uint32_t z[8], uint32_t x)
      34                 :             : {
      35                 :          12 :         z[0] = x;
      36         [ +  + ]:          96 :         for (unsigned i = 1; i < 8; i++) {
      37                 :          84 :                 z[i] = 0;
      38                 :             :         }
      39                 :          12 : }
      40                 :             : 
      41                 :             : /*
      42                 :             :  * 256-bit addition
      43                 :             :  *
      44                 :             :  * in: x, y in [0, 2^256)
      45                 :             :  * out: z = (x + y) mod 2^256
      46                 :             :  *      c = (x + y) div 2^256
      47                 :             :  * That is, z + c * 2^256 = x + y
      48                 :             :  *
      49                 :             :  * Note: as a memory area, z must be either equal to x or y, or not overlap.
      50                 :             :  */
      51                 :          32 : static uint32_t u256_add(uint32_t z[8],
      52                 :             :                                                  const uint32_t x[8], const uint32_t y[8])
      53                 :             : {
      54                 :          32 :         uint32_t carry = 0;
      55                 :             : 
      56         [ +  + ]:         288 :         for (unsigned i = 0; i < 8; i++) {
      57                 :         256 :                 uint64_t sum = (uint64_t) carry + x[i] + y[i];
      58                 :         256 :                 z[i] = (uint32_t) sum;
      59                 :         256 :                 carry = (uint32_t) (sum >> 32);
      60                 :             :         }
      61                 :             : 
      62                 :          32 :         return carry;
      63                 :             : }
      64                 :             : 
      65                 :             : /*
      66                 :             :  * 256-bit subtraction
      67                 :             :  *
      68                 :             :  * in: x, y in [0, 2^256)
      69                 :             :  * out: z = (x - y) mod 2^256
      70                 :             :  *      c = 0 if x >=y, 1 otherwise
      71                 :             :  * That is, z = c * 2^256 + x - y
      72                 :             :  *
      73                 :             :  * Note: as a memory area, z must be either equal to x or y, or not overlap.
      74                 :             :  */
      75                 :        1244 : static uint32_t u256_sub(uint32_t z[8],
      76                 :             :                                                  const uint32_t x[8], const uint32_t y[8])
      77                 :             : {
      78                 :        1244 :         uint32_t carry = 0;
      79                 :             : 
      80         [ +  + ]:       11196 :         for (unsigned i = 0; i < 8; i++) {
      81                 :        9952 :                 uint64_t diff = (uint64_t) x[i] - y[i] - carry;
      82                 :        9952 :                 z[i] = (uint32_t) diff;
      83                 :        9952 :                 carry = -(uint32_t) (diff >> 32);
      84                 :             :         }
      85                 :             : 
      86                 :        1244 :         return carry;
      87                 :             : }
      88                 :             : 
      89                 :             : /*
      90                 :             :  * 256-bit conditional assignment
      91                 :             :  *
      92                 :             :  * in: x in [0, 2^256)
      93                 :             :  *     c in [0, 1]
      94                 :             :  * out: z = x if c == 1, z unchanged otherwise
      95                 :             :  *
      96                 :             :  * Note: as a memory area, z must be either equal to x, or not overlap.
      97                 :             :  */
      98                 :        1244 : static void u256_cmov(uint32_t z[8], const uint32_t x[8], uint32_t c)
      99                 :             : {
     100                 :        1244 :         const uint32_t x_mask = -c;
     101         [ +  + ]:       11196 :         for (unsigned i = 0; i < 8; i++) {
     102                 :        9952 :                 z[i] = (z[i] & ~x_mask) | (x[i] & x_mask);
     103                 :             :         }
     104                 :        1244 : }
     105                 :             : 
     106                 :             : /*
     107                 :             :  * 256-bit compare for equality
     108                 :             :  *
     109                 :             :  * in: x in [0, 2^256)
     110                 :             :  *     y in [0, 2^256)
     111                 :             :  * out: 0 if x == y, unspecified non-zero otherwise
     112                 :             :  */
     113                 :           4 : static uint32_t u256_diff(const uint32_t x[8], const uint32_t y[8])
     114                 :             : {
     115                 :           4 :         uint32_t diff = 0;
     116         [ +  + ]:          36 :         for (unsigned i = 0; i < 8; i++) {
     117                 :          32 :                 diff |= x[i] ^ y[i];
     118                 :             :         }
     119                 :           4 :         return diff;
     120                 :             : }
     121                 :             : 
     122                 :             : /*
     123                 :             :  * 32 x 32 -> 64-bit multiply-and-accumulate
     124                 :             :  *
     125                 :             :  * in: x, y, z, t in [0, 2^32)
     126                 :             :  * out: x * y + z + t in [0, 2^64)
     127                 :             :  *
     128                 :             :  * Note: this computation cannot overflow.
     129                 :             :  *
     130                 :             :  * Note: this function has two pure-C implementations (depending on whether
     131                 :             :  * MUL64_IS_CONSTANT_TIME), and possibly optimised asm implementations.
     132                 :             :  * Start with the potential asm definitions, and use the C definition only if
     133                 :             :  * we no have no asm for the current toolchain & CPU.
     134                 :             :  */
     135                 :             : static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t);
     136                 :             : 
     137                 :             : /* This macro is used to mark whether an asm implentation is found */
     138                 :             : #undef MULADD64_ASM
     139                 :             : /* This macro is used to mark whether the implementation has a small
     140                 :             :  * code size (ie, it can be inlined even in an unrolled loop) */
     141                 :             : #undef MULADD64_SMALL
     142                 :             : 
     143                 :             : /*
     144                 :             :  * Currently assembly optimisations are only supported with GCC/Clang for
     145                 :             :  * Arm's Cortex-A and Cortex-M lines of CPUs, which start with the v6-M and
     146                 :             :  * v7-M architectures. __ARM_ARCH_PROFILE is not defined for v6 and earlier.
     147                 :             :  * Thumb and 32-bit assembly is supported; aarch64 is not supported.
     148                 :             :  */
     149                 :             : #if defined(__GNUC__) &&\
     150                 :             :         defined(__ARM_ARCH) && __ARM_ARCH >= 6 && defined(__ARM_ARCH_PROFILE) && \
     151                 :             :         ( __ARM_ARCH_PROFILE == 77 || __ARM_ARCH_PROFILE == 65 ) /* 'M' or 'A' */ && \
     152                 :             :         !defined(__aarch64__)
     153                 :             : 
     154                 :             : /*
     155                 :             :  * This set of CPUs is conveniently partitioned as follows:
     156                 :             :  *
     157                 :             :  * 1. Cores that have the DSP extension, which includes a 1-cycle UMAAL
     158                 :             :  *    instruction: M4, M7, M33, all A-class cores.
     159                 :             :  * 2. Cores that don't have the DSP extension, and also lack a constant-time
     160                 :             :  *    64-bit multiplication instruction:
     161                 :             :  *    - M0, M0+, M23: 32-bit multiplication only;
     162                 :             :  *    - M3: 64-bit multiplication is not constant-time.
     163                 :             :  */
     164                 :             : #if defined(__ARM_FEATURE_DSP)
     165                 :             : 
     166                 :             : static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
     167                 :             : {
     168                 :             :         __asm__(
     169                 :             :                 /* UMAAL <RdLo>, <RdHi>, <Rn>, <Rm> */
     170                 :             :                 "umaal   %[z], %[t], %[x], %[y]"
     171                 :             :                 : [z] "+l" (z), [t] "+l" (t)
     172                 :             :                 : [x] "l" (x), [y] "l" (y)
     173                 :             :         );
     174                 :             :         return ((uint64_t) t << 32) | z;
     175                 :             : }
     176                 :             : #define MULADD64_ASM
     177                 :             : #define MULADD64_SMALL
     178                 :             : 
     179                 :             : #else /* __ARM_FEATURE_DSP */
     180                 :             : 
     181                 :             : /*
     182                 :             :  * This implementation only uses 16x16->32 bit multiplication.
     183                 :             :  *
     184                 :             :  * It decomposes the multiplicands as:
     185                 :             :  *      x = xh:xl = 2^16 * xh + xl
     186                 :             :  *      y = yh:yl = 2^16 * yh + yl
     187                 :             :  * and computes their product as:
     188                 :             :  *      x*y = xl*yl + 2**16 (xh*yl + yl*yh) + 2**32 xh*yh
     189                 :             :  * then adds z and t to the result.
     190                 :             :  */
     191                 :             : static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
     192                 :             : {
     193                 :             :         /* First compute x*y, using 3 temporary registers */
     194                 :             :         uint32_t tmp1, tmp2, tmp3;
     195                 :             :         __asm__(
     196                 :             :                 ".syntax unified\n\t"
     197                 :             :                 /* start by splitting the inputs into halves */
     198                 :             :                 "lsrs    %[u], %[x], #16\n\t"
     199                 :             :                 "lsrs    %[v], %[y], #16\n\t"
     200                 :             :                 "uxth    %[x], %[x]\n\t"
     201                 :             :                 "uxth    %[y], %[y]\n\t"
     202                 :             :                 /* now we have %[x], %[y], %[u], %[v] = xl, yl, xh, yh */
     203                 :             :                 /* let's compute the 4 products we can form with those */
     204                 :             :                 "movs    %[w], %[v]\n\t"
     205                 :             :                 "muls    %[w], %[u]\n\t"
     206                 :             :                 "muls    %[v], %[x]\n\t"
     207                 :             :                 "muls    %[x], %[y]\n\t"
     208                 :             :                 "muls    %[y], %[u]\n\t"
     209                 :             :                 /* now we have %[x], %[y], %[v], %[w] = xl*yl, xh*yl, xl*yh, xh*yh */
     210                 :             :                 /* let's split and add the first middle product */
     211                 :             :                 "lsls    %[u], %[y], #16\n\t"
     212                 :             :                 "lsrs    %[y], %[y], #16\n\t"
     213                 :             :                 "adds    %[x], %[u]\n\t"
     214                 :             :                 "adcs    %[y], %[w]\n\t"
     215                 :             :                 /* let's finish with the second middle product */
     216                 :             :                 "lsls    %[u], %[v], #16\n\t"
     217                 :             :                 "lsrs    %[v], %[v], #16\n\t"
     218                 :             :                 "adds    %[x], %[u]\n\t"
     219                 :             :                 "adcs    %[y], %[v]\n\t"
     220                 :             :                 : [x] "+l" (x), [y] "+l" (y),
     221                 :             :                   [u] "=&l" (tmp1), [v] "=&l" (tmp2), [w] "=&l" (tmp3)
     222                 :             :                 : /* no read-only inputs */
     223                 :             :                 : "cc"
     224                 :             :         );
     225                 :             :         (void) tmp1;
     226                 :             :         (void) tmp2;
     227                 :             :         (void) tmp3;
     228                 :             : 
     229                 :             :         /* Add z and t, using one temporary register */
     230                 :             :         __asm__(
     231                 :             :                 ".syntax unified\n\t"
     232                 :             :                 "movs    %[u], #0\n\t"
     233                 :             :                 "adds    %[x], %[z]\n\t"
     234                 :             :                 "adcs    %[y], %[u]\n\t"
     235                 :             :                 "adds    %[x], %[t]\n\t"
     236                 :             :                 "adcs    %[y], %[u]\n\t"
     237                 :             :                 : [x] "+l" (x), [y] "+l" (y), [u] "=&l" (tmp1)
     238                 :             :                 : [z] "l" (z), [t] "l" (t)
     239                 :             :                 : "cc"
     240                 :             :         );
     241                 :             :         (void) tmp1;
     242                 :             : 
     243                 :             :         return ((uint64_t) y << 32) | x;
     244                 :             : }
     245                 :             : #define MULADD64_ASM
     246                 :             : 
     247                 :             : #endif /* __ARM_FEATURE_DSP */
     248                 :             : 
     249                 :             : #endif /* GCC/Clang with Cortex-M/A CPU */
     250                 :             : 
     251                 :             : #if !defined(MULADD64_ASM)
     252                 :             : #if defined(MUL64_IS_CONSTANT_TIME)
     253                 :             : static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
     254                 :             : {
     255                 :             :         return (uint64_t) x * y + z + t;
     256                 :             : }
     257                 :             : #define MULADD64_SMALL
     258                 :             : #else
     259                 :      153600 : static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
     260                 :             : {
     261                 :             :         /* x = xl + 2**16 xh, y = yl + 2**16 yh */
     262                 :      153600 :         const uint16_t xl = (uint16_t) x;
     263                 :      153600 :         const uint16_t yl = (uint16_t) y;
     264                 :      153600 :         const uint16_t xh = (uint16_t)(x >> 16);
     265                 :      153600 :         const uint16_t yh = (uint16_t)(y >> 16);
     266                 :             : 
     267                 :             :         /* x*y = xl*yl + 2**16 (xh*yl + yl*yh) + 2**32 xh*yh
     268                 :             :          *     = lo    + 2**16 (m1    + m2   ) + 2**32 hi    */
     269                 :      153600 :         const uint32_t lo = (uint32_t) xl * yl;
     270                 :      153600 :         const uint32_t m1 = (uint32_t) xh * yl;
     271                 :      153600 :         const uint32_t m2 = (uint32_t) xl * yh;
     272                 :      153600 :         const uint32_t hi = (uint32_t) xh * yh;
     273                 :             : 
     274                 :      153600 :         uint64_t acc = lo + ((uint64_t) (hi + (m1 >> 16) + (m2 >> 16)) << 32);
     275                 :      153600 :         acc += m1 << 16;
     276                 :      153600 :         acc += m2 << 16;
     277                 :      153600 :         acc += z;
     278                 :      153600 :         acc += t;
     279                 :             : 
     280                 :      153600 :         return acc;
     281                 :             : }
     282                 :             : #endif /* MUL64_IS_CONSTANT_TIME */
     283                 :             : #endif /* MULADD64_ASM */
     284                 :             : 
     285                 :             : /*
     286                 :             :  * 288 + 32 x 256 -> 288-bit multiply and add
     287                 :             :  *
     288                 :             :  * in: x in [0, 2^32)
     289                 :             :  *     y in [0, 2^256)
     290                 :             :  *     z in [0, 2^288)
     291                 :             :  * out: z_out = z_in + x * y mod 2^288
     292                 :             :  *      c     = z_in + x * y div 2^288
     293                 :             :  * That is, z_out + c * 2^288 = z_in + x * y
     294                 :             :  *
     295                 :             :  * Note: as a memory area, z must be either equal to y, or not overlap.
     296                 :             :  *
     297                 :             :  * This is a helper for Montgomery multiplication.
     298                 :             :  */
     299                 :       19200 : static uint32_t u288_muladd(uint32_t z[9], uint32_t x, const uint32_t y[8])
     300                 :             : {
     301                 :       19200 :         uint32_t carry = 0;
     302                 :             : 
     303                 :             : #define U288_MULADD_STEP(i) \
     304                 :             :         do { \
     305                 :             :                 uint64_t prod = u32_muladd64(x, y[i], z[i], carry); \
     306                 :             :                 z[i] = (uint32_t) prod; \
     307                 :             :                 carry = (uint32_t) (prod >> 32); \
     308                 :             :         } while( 0 )
     309                 :             : 
     310                 :             : #if defined(MULADD64_SMALL)
     311                 :             :         U288_MULADD_STEP(0);
     312                 :             :         U288_MULADD_STEP(1);
     313                 :             :         U288_MULADD_STEP(2);
     314                 :             :         U288_MULADD_STEP(3);
     315                 :             :         U288_MULADD_STEP(4);
     316                 :             :         U288_MULADD_STEP(5);
     317                 :             :         U288_MULADD_STEP(6);
     318                 :             :         U288_MULADD_STEP(7);
     319                 :             : #else
     320         [ +  + ]:      172800 :         for (unsigned i = 0; i < 8; i++) {
     321                 :      153600 :                 U288_MULADD_STEP(i);
     322                 :             :         }
     323                 :             : #endif
     324                 :             : 
     325                 :       19200 :         uint64_t sum = (uint64_t) z[8] + carry;
     326                 :       19200 :         z[8] = (uint32_t) sum;
     327                 :       19200 :         carry = (uint32_t) (sum >> 32);
     328                 :             : 
     329                 :       19200 :         return carry;
     330                 :             : }
     331                 :             : 
     332                 :             : /*
     333                 :             :  * 288-bit in-place right shift by 32 bits
     334                 :             :  *
     335                 :             :  * in: z in [0, 2^288)
     336                 :             :  *     c in [0, 2^32)
     337                 :             :  * out: z_out = z_in div 2^32 + c * 2^256
     338                 :             :  *            = (z_in + c * 2^288) div 2^32
     339                 :             :  *
     340                 :             :  * This is a helper for Montgomery multiplication.
     341                 :             :  */
     342                 :        9600 : static void u288_rshift32(uint32_t z[9], uint32_t c)
     343                 :             : {
     344         [ +  + ]:       86400 :         for (unsigned i = 0; i < 8; i++) {
     345                 :       76800 :                 z[i] = z[i + 1];
     346                 :             :         }
     347                 :        9600 :         z[8] = c;
     348                 :        9600 : }
     349                 :             : 
     350                 :             : /*
     351                 :             :  * 256-bit import from big-endian bytes
     352                 :             :  *
     353                 :             :  * in: p = p0, ..., p31
     354                 :             :  * out: z = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
     355                 :             :  */
     356                 :          12 : static void u256_from_bytes(uint32_t z[8], const uint8_t p[32])
     357                 :             : {
     358         [ +  + ]:         108 :         for (unsigned i = 0; i < 8; i++) {
     359                 :          96 :                 unsigned j = 4 * (7 - i);
     360                 :          96 :                 z[i] = ((uint32_t) p[j + 0] << 24) |
     361                 :          96 :                            ((uint32_t) p[j + 1] << 16) |
     362                 :          96 :                            ((uint32_t) p[j + 2] <<  8) |
     363                 :          96 :                            ((uint32_t) p[j + 3] <<  0);
     364                 :             :         }
     365                 :          12 : }
     366                 :             : 
     367                 :             : /*
     368                 :             :  * 256-bit export to big-endian bytes
     369                 :             :  *
     370                 :             :  * in: z in [0, 2^256)
     371                 :             :  * out: p = p0, ..., p31 such that
     372                 :             :  *      z = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
     373                 :             :  */
     374                 :           4 : static void u256_to_bytes(uint8_t p[32], const uint32_t z[8])
     375                 :             : {
     376         [ +  + ]:          36 :         for (unsigned i = 0; i < 8; i++) {
     377                 :          32 :                 unsigned j = 4 * (7 - i);
     378                 :          32 :                 p[j + 0] = (uint8_t) (z[i] >> 24);
     379                 :          32 :                 p[j + 1] = (uint8_t) (z[i] >> 16);
     380                 :          32 :                 p[j + 2] = (uint8_t) (z[i] >>  8);
     381                 :          32 :                 p[j + 3] = (uint8_t) (z[i] >>  0);
     382                 :             :         }
     383                 :           4 : }
     384                 :             : 
     385                 :             : /**********************************************************************
     386                 :             :  *
     387                 :             :  * Operations modulo a 256-bit prime m
     388                 :             :  *
     389                 :             :  * These are done in the Montgomery domain, that is x is represented by
     390                 :             :  *  x * 2^256 mod m
     391                 :             :  * Numbers need to be converted to that domain before computations,
     392                 :             :  * and back from it afterwards.
     393                 :             :  *
     394                 :             :  * Inversion is computed using Fermat's little theorem.
     395                 :             :  *
     396                 :             :  * Assumptions on m:
     397                 :             :  * - Montgomery operations require that m is odd.
     398                 :             :  * - Fermat's little theorem require it to be a prime.
     399                 :             :  * - m256_inv() further requires that m % 2^32 >= 2.
     400                 :             :  * - m256_inv() also assumes that the value of m is not a secret.
     401                 :             :  *
     402                 :             :  * In practice operations are done modulo the curve's p and n,
     403                 :             :  * both of which satisfy those assumptions.
     404                 :             :  *
     405                 :             :  **********************************************************************/
     406                 :             : 
     407                 :             : /*
     408                 :             :  * Data associated to a modulus for Montgomery operations.
     409                 :             :  *
     410                 :             :  * m in [0, 2^256) - the modulus itself, must be odd
     411                 :             :  * R2 = 2^512 mod m
     412                 :             :  * ni = -m^-1 mod 2^32
     413                 :             :  */
     414                 :             : typedef struct {
     415                 :             :         uint32_t m[8];
     416                 :             :         uint32_t R2[8];
     417                 :             :         uint32_t ni;
     418                 :             : }
     419                 :             : m256_mod;
     420                 :             : 
     421                 :             : /*
     422                 :             :  * Data for Montgomery operations modulo the curve's p
     423                 :             :  */
     424                 :             : static const m256_mod p256_p = {
     425                 :             :         {   /* the curve's p */
     426                 :             :                 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
     427                 :             :                 0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF,
     428                 :             :         },
     429                 :             :         {   /* 2^512 mod p */
     430                 :             :                 0x00000003, 0x00000000, 0xffffffff, 0xfffffffb,
     431                 :             :                 0xfffffffe, 0xffffffff, 0xfffffffd, 0x00000004,
     432                 :             :         },
     433                 :             :         0x00000001, /* -p^-1 mod 2^32 */
     434                 :             : };
     435                 :             : 
     436                 :             : /*
     437                 :             :  * Modular addition
     438                 :             :  *
     439                 :             :  * in: x, y in [0, m)
     440                 :             :  *     mod must point to a valid m256_mod structure
     441                 :             :  * out: z = (x + y) mod m, in [0, m)
     442                 :             :  *
     443                 :             :  * Note: as a memory area, z must be either equal to x or y, or not overlap.
     444                 :             :  */
     445                 :           8 : static void m256_add(uint32_t z[8],
     446                 :             :                                          const uint32_t x[8], const uint32_t y[8],
     447                 :             :                                          const m256_mod *mod)
     448                 :             : {
     449                 :             :         uint32_t r[8];
     450                 :           8 :         uint32_t carry_add = u256_add(z, x, y);
     451                 :           8 :         uint32_t carry_sub = u256_sub(r, z, mod->m);
     452                 :             :         /* Need to subract m if:
     453                 :             :          *      x+y >= 2^256 > m (that is, carry_add == 1)
     454                 :             :          *   OR z >= m (that is, carry_sub == 0) */
     455                 :           8 :         uint32_t use_sub = carry_add | (1 - carry_sub);
     456                 :           8 :         u256_cmov(z, r, use_sub);
     457                 :           8 : }
     458                 :             : 
     459                 :             : /*
     460                 :             :  * Modular addition mod p
     461                 :             :  *
     462                 :             :  * in: x, y in [0, p)
     463                 :             :  * out: z = (x + y) mod p, in [0, p)
     464                 :             :  *
     465                 :             :  * Note: as a memory area, z must be either equal to x or y, or not overlap.
     466                 :             :  */
     467                 :           8 : static void m256_add_p(uint32_t z[8],
     468                 :             :                                            const uint32_t x[8], const uint32_t y[8])
     469                 :             : {
     470                 :           8 :         m256_add(z, x, y, &p256_p);
     471                 :           8 : }
     472                 :             : 
     473                 :             : /*
     474                 :             :  * Modular subtraction
     475                 :             :  *
     476                 :             :  * in: x, y in [0, m)
     477                 :             :  *     mod must point to a valid m256_mod structure
     478                 :             :  * out: z = (x - y) mod m, in [0, m)
     479                 :             :  *
     480                 :             :  * Note: as a memory area, z must be either equal to x or y, or not overlap.
     481                 :             :  */
     482                 :          24 : static void m256_sub(uint32_t z[8],
     483                 :             :                                          const uint32_t x[8], const uint32_t y[8],
     484                 :             :                                          const m256_mod *mod)
     485                 :             : {
     486                 :             :         uint32_t r[8];
     487                 :          24 :         uint32_t carry = u256_sub(z, x, y);
     488                 :          24 :         (void) u256_add(r, z, mod->m);
     489                 :             :         /* Need to add m if and only if x < y, that is carry == 1.
     490                 :             :          * In that case z is in [2^256 - m + 1, 2^256 - 1], so the
     491                 :             :          * addition will have a carry as well, which cancels out. */
     492                 :          24 :         u256_cmov(z, r, carry);
     493                 :          24 : }
     494                 :             : 
     495                 :             : /*
     496                 :             :  * Modular subtraction mod p
     497                 :             :  *
     498                 :             :  * in: x, y in [0, p)
     499                 :             :  * out: z = (x + y) mod p, in [0, p)
     500                 :             :  *
     501                 :             :  * Note: as a memory area, z must be either equal to x or y, or not overlap.
     502                 :             :  */
     503                 :          24 : static void m256_sub_p(uint32_t z[8],
     504                 :             :                                            const uint32_t x[8], const uint32_t y[8])
     505                 :             : {
     506                 :          24 :         m256_sub(z, x, y, &p256_p);
     507                 :          24 : }
     508                 :             : 
     509                 :             : /*
     510                 :             :  * Montgomery modular multiplication
     511                 :             :  *
     512                 :             :  * in: x, y in [0, m)
     513                 :             :  *     mod must point to a valid m256_mod structure
     514                 :             :  * out: z = (x * y) / 2^256 mod m, in [0, m)
     515                 :             :  *
     516                 :             :  * Note: as a memory area, z may overlap with x or y.
     517                 :             :  */
     518                 :        1200 : static void m256_mul(uint32_t z[8],
     519                 :             :                                          const uint32_t x[8], const uint32_t y[8],
     520                 :             :                                          const m256_mod *mod)
     521                 :             : {
     522                 :             :         /*
     523                 :             :          * Algorithm 14.36 in Handbook of Applied Cryptography with:
     524                 :             :          * b = 2^32, n = 8, R = 2^256
     525                 :             :          */
     526                 :        1200 :         uint32_t m_prime = mod->ni;
     527                 :             :         uint32_t a[9];
     528                 :             : 
     529         [ +  + ]:       12000 :         for (unsigned i = 0; i < 9; i++) {
     530                 :       10800 :                 a[i] = 0;
     531                 :             :         }
     532                 :             : 
     533         [ +  + ]:       10800 :         for (unsigned i = 0; i < 8; i++) {
     534                 :             :                 /* the "mod 2^32" is implicit from the type */
     535                 :        9600 :                 uint32_t u = (a[0] + x[i] * y[0]) * m_prime;
     536                 :             : 
     537                 :             :                 /* a = (a + x[i] * y + u * m) div b */
     538                 :        9600 :                 uint32_t c = u288_muladd(a, x[i], y);
     539                 :        9600 :                 c += u288_muladd(a, u, mod->m);
     540                 :        9600 :                 u288_rshift32(a, c);
     541                 :             :         }
     542                 :             : 
     543                 :             :         /* a = a > m ? a - m : a */
     544                 :        1200 :         uint32_t carry_add = a[8];  // 0 or 1 since a < 2m, see HAC Note 14.37
     545                 :        1200 :         uint32_t carry_sub = u256_sub(z, a, mod->m);
     546                 :        1200 :         uint32_t use_sub = carry_add | (1 - carry_sub);     // see m256_add()
     547                 :        1200 :         u256_cmov(z, a, 1 - use_sub);
     548                 :        1200 : }
     549                 :             : 
     550                 :             : /*
     551                 :             :  * Montgomery modular multiplication modulo p.
     552                 :             :  *
     553                 :             :  * in: x, y in [0, p)
     554                 :             :  * out: z = (x * y) / 2^256 mod p, in [0, p)
     555                 :             :  *
     556                 :             :  * Note: as a memory area, z may overlap with x or y.
     557                 :             :  */
     558                 :        1180 : static void m256_mul_p(uint32_t z[8],
     559                 :             :                                            const uint32_t x[8], const uint32_t y[8])
     560                 :             : {
     561                 :        1180 :         m256_mul(z, x, y, &p256_p);
     562                 :        1180 : }
     563                 :             : 
     564                 :             : /*
     565                 :             :  * In-place conversion to Montgomery form
     566                 :             :  *
     567                 :             :  * in: z in [0, m)
     568                 :             :  *     mod must point to a valid m256_mod structure
     569                 :             :  * out: z_out = z_in * 2^256 mod m, in [0, m)
     570                 :             :  */
     571                 :          16 : static void m256_prep(uint32_t z[8], const m256_mod *mod)
     572                 :             : {
     573                 :          16 :         m256_mul(z, z, mod->R2, mod);
     574                 :          16 : }
     575                 :             : 
     576                 :             : /*
     577                 :             :  * In-place conversion from Montgomery form
     578                 :             :  *
     579                 :             :  * in: z in [0, m)
     580                 :             :  *     mod must point to a valid m256_mod structure
     581                 :             :  * out: z_out = z_in / 2^256 mod m, in [0, m)
     582                 :             :  * That is, z_in was z_actual * 2^256 mod m, and z_out is z_actual
     583                 :             :  */
     584                 :           4 : static void m256_done(uint32_t z[8], const m256_mod *mod)
     585                 :             : {
     586                 :             :         uint32_t one[8];
     587                 :           4 :         u256_set32(one, 1);
     588                 :           4 :         m256_mul(z, z, one, mod);
     589                 :           4 : }
     590                 :             : 
     591                 :             : /*
     592                 :             :  * Set to 32-bit value
     593                 :             :  *
     594                 :             :  * in: x in [0, 2^32)
     595                 :             :  *     mod must point to a valid m256_mod structure
     596                 :             :  * out: z = x * 2^256 mod m, in [0, m)
     597                 :             :  * That is, z is set to the image of x in the Montgomery domain.
     598                 :             :  */
     599                 :           4 : static void m256_set32(uint32_t z[8], uint32_t x, const m256_mod *mod)
     600                 :             : {
     601                 :           4 :         u256_set32(z, x);
     602                 :           4 :         m256_prep(z, mod);
     603                 :           4 : }
     604                 :             : 
     605                 :             : /*
     606                 :             :  * Import modular integer from bytes to Montgomery domain
     607                 :             :  *
     608                 :             :  * in: p = p0, ..., p32
     609                 :             :  *     mod must point to a valid m256_mod structure
     610                 :             :  * out: z = (p0 * 2^248 + ... + p31) * 2^256 mod m, in [0, m)
     611                 :             :  *      return 0 if the number was already in [0, m), or -1.
     612                 :             :  *      z may be incorrect and must be discared when -1 is returned.
     613                 :             :  */
     614                 :          12 : static int m256_from_bytes(uint32_t z[8],
     615                 :             :                                                    const uint8_t p[32], const m256_mod *mod)
     616                 :             : {
     617                 :          12 :         u256_from_bytes(z, p);
     618                 :             : 
     619                 :             :         uint32_t t[8];
     620                 :          12 :         uint32_t lt_m = u256_sub(t, z, mod->m);
     621         [ -  + ]:          12 :         if (lt_m != 1)
     622                 :           0 :                 return -1;
     623                 :             : 
     624                 :          12 :         m256_prep(z, mod);
     625                 :          12 :         return 0;
     626                 :             : }
     627                 :             : 
     628                 :             : /*
     629                 :             :  * Export modular integer from Montgomery domain to bytes
     630                 :             :  *
     631                 :             :  * in: z in [0, 2^256)
     632                 :             :  *     mod must point to a valid m256_mod structure
     633                 :             :  * out: p = p0, ..., p31 such that
     634                 :             :  *      z = (p0 * 2^248 + ... + p31) * 2^256 mod m
     635                 :             :  */
     636                 :           4 : static void m256_to_bytes(uint8_t p[32],
     637                 :             :                                                   const uint32_t z[8], const m256_mod *mod)
     638                 :             : {
     639                 :             :         uint32_t zi[8];
     640                 :           4 :         u256_cmov(zi, z, 1);
     641                 :           4 :         m256_done(zi, mod);
     642                 :             : 
     643                 :           4 :         u256_to_bytes(p, zi);
     644                 :           4 : }
     645                 :             : 
     646                 :             : /**********************************************************************
     647                 :             :  *
     648                 :             :  * Operations on curve points
     649                 :             :  *
     650                 :             :  * Points are represented in two coordinates system:
     651                 :             :  *  - affine (x, y) - extended to represent 0 (see below)
     652                 :             :  *  - jacobian (x:y:z)
     653                 :             :  * In either case, coordinates are integers modulo p256_p and
     654                 :             :  * are always represented in the Montgomery domain.
     655                 :             :  *
     656                 :             :  * For background on jacobian coordinates, see for example [GECC] 3.2.2:
     657                 :             :  * - conversions go (x, y) -> (x:y:1) and (x:y:z) -> (x/z^2, y/z^3)
     658                 :             :  * - the curve equation becomes y^2 = x^3 - 3 x z^4 + b z^6
     659                 :             :  * - 0 (aka the origin aka point at infinity) is (x:y:0) with y^2 = x^3.
     660                 :             :  * - point negation goes -(x:y:z) = (x:-y:z)
     661                 :             :  *
     662                 :             :  * Normally 0 (the point at infinity) can't be represented in affine
     663                 :             :  * coordinates. However we extend affine coordinates with the convention that
     664                 :             :  * (0, 0) (which is normally not a point on the curve) is interpreted as 0.
     665                 :             :  *
     666                 :             :  * References:
     667                 :             :  * - [GECC]: Guide to Elliptic Curve Cryptography; Hankerson, Menezes,
     668                 :             :  *   Vanstone; Springer, 2004.
     669                 :             :  * - [CMO98]: Efficient Elliptic Curve Exponentiation Using Mixed Coordinates;
     670                 :             :  *   Cohen, Miyaji, Ono; Springer, ASIACRYPT 1998.
     671                 :             :  *   https://link.springer.com/content/pdf/10.1007/3-540-49649-1_6.pdf
     672                 :             :  * - [RCB15]: Complete addition formulas for prime order elliptic curves;
     673                 :             :  *   Renes, Costello, Batina; IACR e-print 2015-1060.
     674                 :             :  *   https://eprint.iacr.org/2015/1060.pdf
     675                 :             :  *
     676                 :             :  **********************************************************************/
     677                 :             : 
     678                 :             : /*
     679                 :             :  * The curve's b parameter in the Short Weierstrass equation
     680                 :             :  *  y^2 = x^3 - 3*x + b
     681                 :             :  * Compared to the standard, this is converted to the Montgomery domain.
     682                 :             :  */
     683                 :             : static const uint32_t p256_b[8] = { /* b * 2^256 mod p */
     684                 :             :         0x29c4bddf, 0xd89cdf62, 0x78843090, 0xacf005cd,
     685                 :             :         0xf7212ed6, 0xe5a220ab, 0x04874834, 0xdc30061d,
     686                 :             : };
     687                 :             : 
     688                 :             : /*
     689                 :             :  * Point-on-curve check - do the coordinates satisfy the curve's equation?
     690                 :             :  *
     691                 :             :  * in: x, y in [0, p)   (Montgomery domain)
     692                 :             :  * out: 0 if the point lies on the curve and is not 0,
     693                 :             :  *      unspecified non-zero otherwise
     694                 :             :  */
     695                 :           4 : static uint32_t point_check(const uint32_t x[8], const uint32_t y[8])
     696                 :             : {
     697                 :             :         uint32_t lhs[8], rhs[8];
     698                 :             : 
     699                 :             :         /* lhs = y^2 */
     700                 :           4 :         m256_mul_p(lhs, y, y);
     701                 :             : 
     702                 :             :         /* rhs = x^3 - 3x + b */
     703                 :           4 :         m256_mul_p(rhs, x,   x);      /* x^2 */
     704                 :           4 :         m256_mul_p(rhs, rhs, x);      /* x^3 */
     705         [ +  + ]:          16 :         for (unsigned i = 0; i < 3; i++)
     706                 :          12 :                 m256_sub_p(rhs, rhs, x);  /* x^3 - 3x */
     707                 :           4 :         m256_add_p(rhs, rhs, p256_b); /* x^3 - 3x + b */
     708                 :             : 
     709                 :           4 :         return u256_diff(lhs, rhs);
     710                 :             : }
     711                 :             : 
     712                 :             : /*
     713                 :             :  * Import curve point from bytes
     714                 :             :  *
     715                 :             :  * in: p = (x, y) concatenated, fixed-width 256-bit big-endian integers
     716                 :             :  * out: x, y in Mongomery domain
     717                 :             :  *      return 0 if x and y are both in [0, p)
     718                 :             :  *                  and (x, y) is on the curve and not 0
     719                 :             :  *             unspecified non-zero otherwise.
     720                 :             :  *      x and y are unspecified and must be discarded if returning non-zero.
     721                 :             :  */
     722                 :           4 : static int point_from_bytes(uint32_t x[8], uint32_t y[8], const uint8_t p[64])
     723                 :             : {
     724                 :             :         int ret;
     725                 :             : 
     726                 :           4 :         ret = m256_from_bytes(x, p, &p256_p);
     727         [ -  + ]:           4 :         if (ret != 0)
     728                 :           0 :                 return ret;
     729                 :             : 
     730                 :           4 :         ret = m256_from_bytes(y, p + 32, &p256_p);
     731         [ -  + ]:           4 :         if (ret != 0)
     732                 :           0 :                 return ret;
     733                 :             : 
     734                 :           4 :         return (int) point_check(x, y);
     735                 :             : }
     736                 :             : 
     737                 :           4 : int crypto_p256_uncompress_point(const uint8_t *input, size_t ilen,
     738                 :             :                                  uint8_t *output, size_t *olen, size_t osize)
     739                 :             : {
     740                 :             :         uint32_t x[8], r[8], y2[8];
     741                 :             :         int ret;
     742                 :             : 
     743         [ -  + ]:           4 :         if (ilen != 32) {
     744                 :           0 :                 return PSA_ERROR_INVALID_ARGUMENT;
     745                 :             :         }
     746                 :             : 
     747         [ -  + ]:           4 :         if (osize < 65) {
     748                 :           0 :                 return PSA_ERROR_INVALID_ARGUMENT;
     749                 :             :         }
     750                 :             : 
     751                 :             :         // output will consist of 0x04|X|Y
     752                 :           4 :         *olen = 65;
     753                 :           4 :         output[0] = 0x04;
     754                 :             : 
     755                 :             :         // x <= input
     756         [ -  + ]:           4 :         if (m256_from_bytes(x, input, &p256_p) != 0) {
     757                 :           0 :                 return PSA_ERROR_INVALID_ARGUMENT;
     758                 :             :         }
     759                 :             : 
     760                 :             :         /* r = x^3 - 3x + b */
     761                 :           4 :         m256_mul_p(r, x, x); /* x^2 */
     762                 :           4 :         m256_mul_p(r, r, x); /* x^3 */
     763         [ +  + ]:          16 :         for (unsigned int i = 0; i < 3; i++)
     764                 :          12 :                 m256_sub_p(r, r, x); /* x^3 - 3x */
     765                 :           4 :         m256_add_p(r, r, p256_b); /* x^3 - 3x + b */
     766                 :             :         /* y^2 = r */
     767                 :           4 :         u256_cmov(y2, r, 1);
     768                 :             : 
     769                 :             :         /* exp = (p + 1)/4 = (p + 1) >> 2 */
     770                 :           4 :         uint32_t exp[8] = {
     771                 :             :                 0x00000000, 0x00000000, 0x40000000, 0x00000000,
     772                 :             :                 0x00000000, 0x40000000, 0xC0000000, 0x3FFFFFFF
     773                 :             :         };
     774                 :             : 
     775                 :             :         /* Binary exponentiation */
     776                 :           4 :         m256_set32(r, 1, &p256_p);
     777         [ +  + ]:          36 :         for (uint32_t i = 0; i < 8; i++) {
     778                 :          32 :                 uint32_t exp_limb = exp[i];
     779         [ +  + ]:        1056 :                 for (uint32_t j = 0; j < 32; j++) {
     780         [ +  + ]:        1024 :                         if (exp_limb & 1) {
     781                 :         136 :                                 m256_mul_p(r, r, y2);
     782                 :             :                         }
     783                 :        1024 :                         m256_mul_p(y2, y2, y2);
     784                 :        1024 :                         exp_limb >>= 1;
     785                 :             :                 }
     786                 :             :         }
     787                 :             : 
     788                 :             :         uint32_t y[8], zero[8];
     789                 :           4 :         u256_set32(zero, 0);
     790                 :             :         /* y = r */
     791                 :           4 :         u256_cmov(y, r, 1);
     792                 :             : 
     793                 :             :         uint8_t y_raw[32];
     794                 :           4 :         m256_to_bytes(y_raw, y, &p256_p);
     795                 :             : 
     796                 :             :         uint8_t p[64];
     797                 :             :         uint32_t x_check[8], y_check[8];
     798                 :           4 :         memcpy(p, input, 32);
     799                 :           4 :         memcpy(p + 32, y_raw, 32);
     800                 :           4 :         ret = point_from_bytes(x_check, y_check, p);
     801         [ -  + ]:           4 :         if (ret != 0) {
     802                 :           0 :                 return PSA_ERROR_INVALID_ARGUMENT;
     803                 :             :         }
     804                 :             : 
     805                 :           4 :         memcpy(output + 1, p, 64);
     806                 :             : 
     807                 :           4 :         return 0;
     808                 :             : }

Generated by: LCOV version 2.0-1