bignum Wdiff usr/src/common/bignum/mont_mulf.c

Print this page

6799218 RSA using Solaris Kernel Crypto framework lagging behind OpenSSL
5016936 bignumimpl:big_mul: potential memory leak
6810280 panic from bignum module: vmem_xalloc(): size == 0

Split	Close
Expand all
Collapse all

          --- old/usr/src/common/bignum/mont_mulf.c
          +++ new/usr/src/common/bignum/mont_mulf.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5      - * Common Development and Distribution License, Version 1.0 only
   6      - * (the "License").  You may not use this file except in compliance
   7      - * with the License.
        5 + * Common Development and Distribution License (the "License").
        6 + * You may not use this file except in compliance with the License.
   8    7   *
   9    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10    9   * or http://www.opensolaris.org/os/licensing.
  11   10   * See the License for the specific language governing permissions
  12   11   * and limitations under the License.
  13   12   *
  14   13   * When distributing Covered Code, include this CDDL HEADER in each
  15   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16   15   * If applicable, add the following below this CDDL HEADER, with the
  17   16   * fields enclosed by brackets "[]" replaced with your own identifying
  18   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  19   18   *
  20   19   * CDDL HEADER END
  21   20   */
  22   21  /*
  23      - * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
       22 + * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   23   * Use is subject to license terms.
  25   24   */
  26   25  
  27      -#pragma ident   "%Z%%M% %I%     %E% SMI"
  28      -
  29   26  /*
  30   27   * If compiled without -DRF_INLINE_MACROS then needs -lm at link time
  31   28   * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time
  32      - * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
       29 + * (i.e. cc <compiler_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
  33   30   */
  34   31  
  35   32  #include <sys/types.h>
  36   33  #include <math.h>
  37   34  
  38   35  static const double TwoTo16 = 65536.0;
  39   36  static const double TwoToMinus16 = 1.0/65536.0;
  40   37  static const double Zero = 0.0;
  41   38  static const double TwoTo32 = 65536.0 * 65536.0;
  42   39  static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);

  43   40  
  44   41  #ifdef RF_INLINE_MACROS
  45   42  
  46   43  double upper32(double);
  47   44  double lower32(double, double);
  48   45  double mod(double, double, double);
  49   46  
  50   47  #else
  51   48  
  52   49  static double
  53   50  upper32(double x)
  54   51  {
  55   52          return (floor(x * TwoToMinus32));
  56   53  }
  57   54  
  58   55  
  59   56  static double
  60   57  lower32(double x, double y)
  61   58  {
  62   59          return (x - TwoTo32 * floor(x * TwoToMinus32));
  63   60  }
  64   61  
  65   62  static double
  66   63  mod(double x, double oneoverm, double m)
  67   64  {
  68   65          return (x - m * floor(x * oneoverm));
  69   66  }
  70   67  
  71   68  #endif
  72   69  
  73   70  
  74   71  static void
  75   72  cleanup(double *dt, int from, int tlen)
  76   73  {
  77   74          int i;
  78   75          double tmp, tmp1, x, x1;
  79   76  
  80   77          tmp = tmp1 = Zero;
  81   78  
  82   79          for (i = 2 * from; i < 2 * tlen; i += 2) {
  83   80                  x = dt[i];
  84   81                  x1 = dt[i + 1];
  85   82                  dt[i] = lower32(x, Zero) + tmp;
  86   83                  dt[i + 1] = lower32(x1, Zero) + tmp1;

↓ open down ↓

44 lines elided

↑ open up ↑

  87   84                  tmp = upper32(x);
  88   85                  tmp1 = upper32(x1);
  89   86          }
  90   87  }
  91   88  
  92   89  
  93   90  void
  94   91  conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
  95   92  {
  96   93          int i;
  97      -        int64_t t, t1,          /* using int64_t and not uint64_t */
  98      -                a, b, c, d;     /* because more efficient code is */
       94 +        int64_t t, t1,          /* Using int64_t and not uint64_t */
       95 +            a, b, c, d;         /* because more efficient code is */
  99   96                                  /* generated this way, and there  */
 100      -                                /* is no overflow  */
       97 +                                /* is no overflow.  */
 101   98          t1 = 0;
 102   99          a = (int64_t)d16[0];
 103  100          b = (int64_t)d16[1];
 104  101          for (i = 0; i < ilen - 1; i++) {
 105  102                  c = (int64_t)d16[2 * i + 2];
 106  103                  t1 += a & 0xffffffff;
 107  104                  t = (a >> 32);
 108  105                  d = (int64_t)d16[2 * i + 3];
 109  106                  t1 += (b & 0xffff) << 16;
 110  107                  t += (b >> 16) + (t1 >> 32);

 111  108                  i32[i] = t1 & 0xffffffff;
 112  109                  t1 = t;
 113  110                  a = c;
 114  111                  b = d;
 115  112          }
 116  113          t1 += a & 0xffffffff;
 117  114          t = (a >> 32);
 118  115          t1 += (b & 0xffff) << 16;
 119  116          i32[i] = t1 & 0xffffffff;
 120  117  }
 121  118  
 122  119  void
 123  120  conv_i32_to_d32(double *d32, uint32_t *i32, int len)
 124  121  {
 125  122          int i;
 126  123  
 127  124  #pragma pipeloop(0)
 128  125          for (i = 0; i < len; i++)
 129  126                  d32[i] = (double)(i32[i]);
 130  127  }
 131  128  
 132  129  
 133  130  void
 134  131  conv_i32_to_d16(double *d16, uint32_t *i32, int len)
 135  132  {
 136  133          int i;
 137  134          uint32_t a;
 138  135  
 139  136  #pragma pipeloop(0)
 140  137          for (i = 0; i < len; i++) {
 141  138                  a = i32[i];
 142  139                  d16[2 * i] = (double)(a & 0xffff);
 143  140                  d16[2 * i + 1] = (double)(a >> 16);
 144  141          }
 145  142  }
 146  143  
 147  144  #ifdef RF_INLINE_MACROS
 148  145  
 149  146  void
 150  147  i16_to_d16_and_d32x4(const double *,    /* 1/(2^16) */
 151  148                          const double *, /* 2^16 */
 152  149                          const double *, /* 0 */
 153  150                          double *,       /* result16 */
 154  151                          double *,       /* result32 */
 155  152                          float *);       /* source - should be unsigned int* */
 156  153                                          /* converted to float* */
 157  154  
 158  155  #else
 159  156  
 160  157  
 161  158  static void
 162  159  i16_to_d16_and_d32x4(const double *dummy1,      /* 1/(2^16) */
 163  160                          const double *dummy2,   /* 2^16 */
 164  161                          const double *dummy3,   /* 0 */
 165  162                          double *result16,
 166  163                          double *result32,
 167  164                          float *src)     /* source - should be unsigned int* */
 168  165                                          /* converted to float* */
 169  166  {
 170  167          uint32_t *i32;
 171  168          uint32_t a, b, c, d;
 172  169  
 173  170          i32 = (uint32_t *)src;
 174  171          a = i32[0];
 175  172          b = i32[1];
 176  173          c = i32[2];
 177  174          d = i32[3];
 178  175          result16[0] = (double)(a & 0xffff);
 179  176          result16[1] = (double)(a >> 16);
 180  177          result32[0] = (double)a;
 181  178          result16[2] = (double)(b & 0xffff);
 182  179          result16[3] = (double)(b >> 16);
 183  180          result32[1] = (double)b;
 184  181          result16[4] = (double)(c & 0xffff);
 185  182          result16[5] = (double)(c >> 16);
 186  183          result32[2] = (double)c;
 187  184          result16[6] = (double)(d & 0xffff);
 188  185          result16[7] = (double)(d >> 16);
 189  186          result32[3] = (double)d;
 190  187  }
 191  188  
 192  189  #endif
 193  190

↓ open down ↓

83 lines elided

↑ open up ↑

 194  191  
 195  192  void
 196  193  conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len)
 197  194  {
 198  195          int i;
 199  196          uint32_t a;
 200  197  
 201  198  #pragma pipeloop(0)
 202  199          for (i = 0; i < len - 3; i += 4) {
 203  200                  i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
 204      -                                        &(d16[2*i]), &(d32[i]),
 205      -                                        (float *)(&(i32[i])));
      201 +                    &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
 206  202          }
 207  203          for (; i < len; i++) {
 208  204                  a = i32[i];
 209  205                  d32[i] = (double)(i32[i]);
 210  206                  d16[2 * i] = (double)(a & 0xffff);
 211  207                  d16[2 * i + 1] = (double)(a >> 16);
 212  208          }
 213  209  }
 214  210  
 215  211

 216  212  static void
 217  213  adjust_montf_result(uint32_t *i32, uint32_t *nint, int len)
 218  214  {
 219  215          int64_t acc;
 220  216          int i;
 221  217  
 222  218          if (i32[len] > 0)
 223  219                  i = -1;
 224  220          else {
 225  221                  for (i = len - 1; i >= 0; i--) {
 226  222                          if (i32[i] != nint[i]) break;
 227  223                  }
 228  224          }
 229  225          if ((i < 0) || (i32[i] > nint[i])) {
 230  226                  acc = 0;
 231  227                  for (i = 0; i < len; i++) {
 232  228                          acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
 233  229                          i32[i] = acc & 0xffffffff;
 234  230                          acc = acc >> 32;
 235  231                  }
 236  232          }
 237  233  }
 238  234  
 239  235  
 240  236  /*
 241  237   * the lengths of the input arrays should be at least the following:
 242  238   * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
 243  239   * all of them should be different from one another
 244  240   */
 245  241  void mont_mulf_noconv(uint32_t *result,
 246  242                          double *dm1, double *dm2, double *dt,
 247  243                          double *dn, uint32_t *nint,
 248  244                          int nlen, double dn0)
 249  245  {
 250  246          int i, j, jj;
 251  247          double digit, m2j, a, b;
 252  248          double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
 253  249  
 254  250          pdm1 = &(dm1[0]);
 255  251          pdm2 = &(dm2[0]);
 256  252          pdn = &(dn[0]);
 257  253          pdm2[2 * nlen] = Zero;
 258  254  
 259  255          if (nlen != 16) {
 260  256                  for (i = 0; i < 4 * nlen + 2; i++)
 261  257                          dt[i] = Zero;
 262  258                  a = dt[0] = pdm1[0] * pdm2[0];
 263  259                  digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
 264  260  
 265  261                  pdtj = &(dt[0]);
 266  262                  for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
 267  263                          m2j = pdm2[j];
 268  264                          a = pdtj[0] + pdn[0] * digit;
 269  265                          b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
 270  266                          pdtj[1] = b;
 271  267

↓ open down ↓

56 lines elided

↑ open up ↑

 272  268  #pragma pipeloop(0)
 273  269                          for (i = 1; i < nlen; i++) {
 274  270                                  pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
 275  271                          }
 276  272                          if (jj == 30) {
 277  273                                  cleanup(dt, j / 2 + 1, 2 * nlen + 1);
 278  274                                  jj = 0;
 279  275                          }
 280  276  
 281  277                          digit = mod(lower32(b, Zero) * dn0,
 282      -                                    TwoToMinus16, TwoTo16);
      278 +                            TwoToMinus16, TwoTo16);
 283  279                  }
 284  280          } else {
 285  281                  a = dt[0] = pdm1[0] * pdm2[0];
 286  282  
 287  283                  dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
 288      -                        dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
 289      -                        dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
 290      -                        dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
 291      -                        dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
 292      -                        dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
 293      -                        dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
 294      -                        dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
 295      -                        dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
 296      -                        dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
 297      -                        dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
 298      -                        dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
 299      -                        dt[3] = dt[2] = dt[1] = Zero;
      284 +                    dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
      285 +                    dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
      286 +                    dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
      287 +                    dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
      288 +                    dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
      289 +                    dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
      290 +                    dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
      291 +                    dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
      292 +                    dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
      293 +                    dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
      294 +                    dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
      295 +                    dt[3] = dt[2] = dt[1] = Zero;
 300  296  
 301  297                  pdn_0 = pdn[0];
 302  298                  pdm1_0 = pdm1[0];
 303  299  
 304  300                  digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
 305  301                  pdtj = &(dt[0]);
 306  302  
 307  303                  for (j = 0; j < 32; j++, pdtj++) {
 308  304  
 309  305                          m2j = pdm2[j];

 310  306                          a = pdtj[0] + pdn_0 * digit;
 311  307                          b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16;
 312  308                          pdtj[1] = b;
 313  309  
 314  310                          pdtj[2] += pdm1[1] *m2j + pdn[1] * digit;
 315  311                          pdtj[4] += pdm1[2] *m2j + pdn[2] * digit;
 316  312                          pdtj[6] += pdm1[3] *m2j + pdn[3] * digit;
 317  313                          pdtj[8] += pdm1[4] *m2j + pdn[4] * digit;
 318  314                          pdtj[10] += pdm1[5] *m2j + pdn[5] * digit;

↓ open down ↓

9 lines elided

↑ open up ↑

 319  315                          pdtj[12] += pdm1[6] *m2j + pdn[6] * digit;
 320  316                          pdtj[14] += pdm1[7] *m2j + pdn[7] * digit;
 321  317                          pdtj[16] += pdm1[8] *m2j + pdn[8] * digit;
 322  318                          pdtj[18] += pdm1[9] *m2j + pdn[9] * digit;
 323  319                          pdtj[20] += pdm1[10] *m2j + pdn[10] * digit;
 324  320                          pdtj[22] += pdm1[11] *m2j + pdn[11] * digit;
 325  321                          pdtj[24] += pdm1[12] *m2j + pdn[12] * digit;
 326  322                          pdtj[26] += pdm1[13] *m2j + pdn[13] * digit;
 327  323                          pdtj[28] += pdm1[14] *m2j + pdn[14] * digit;
 328  324                          pdtj[30] += pdm1[15] *m2j + pdn[15] * digit;
 329      -                        /* no need for cleenup, cannot overflow */
      325 +                        /* no need for cleanup, cannot overflow */
 330  326                          digit = mod(lower32(b, Zero) * dn0,
 331      -                                    TwoToMinus16, TwoTo16);
      327 +                            TwoToMinus16, TwoTo16);
 332  328                  }
 333  329          }
 334  330  
 335  331          conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1);
 336  332          adjust_montf_result(result, nint, nlen);
 337  333  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX