Print this page
6799218 RSA using Solaris Kernel Crypto framework lagging behind OpenSSL
5016936 bignumimpl:big_mul: potential memory leak
6810280 panic from bignum module: vmem_xalloc(): size == 0
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/asm_linkage.h>
  30 
  31 #if defined(lint) || defined(__lint)
  32 
  33 #include <sys/types.h>
  34 
  35 /* ARGSUSED */
  36 uint64_t
  37 big_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
  38 { return (0); }
  39 
  40 /* ARGSUSED */
  41 uint64_t
  42 big_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
  43 { return (0); }
  44 
  45 /* ARGSUSED */
  46 void
  47 big_sqr_vec64(uint64_t *r, uint64_t *a, int len)
  48 {}
  49 
  50 #else   /* lint */
  51 
  52 / ------------------------------------------------------------------------
  53 /
  54 /  Implementation of big_mul_set_vec which exploits
  55 /  the 64X64->128 bit  unsigned multiply instruction.
  56 /
  57 /  As defined in Sun's bignum library for pkcs11, bignums are
  58 /  composed of an array of 32-bit "digits" along with descriptive
  59 /  information.  The arrays of digits are only required to be
  60 /  aligned on 32-bit boundary.  This implementation works only
  61 /  when the two factors and the result happen to be 64 bit aligned
  62 /  and have an even number of digits.
  63 /
  64 / ------------------------------------------------------------------------
  65 
  66 / r = a * digit, r and a are vectors of length len
  67 / returns the carry digit
  68 / r and a are 64 bit aligned.
  69 /
  70 / uint64_t
  71 / big_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
  72 /
  73         ENTRY(big_mul_set_vec64)
  74         xorq    %rax, %rax              / if (len == 0) return (0)
  75         testq   %rdx, %rdx
  76         jz      .L17
  77 
  78         movq    %rdx, %r8               / Use r8 for len; %rdx is used by mul
  79         xorq    %r9, %r9                / cy = 0
  80 
  81 .L15:
  82         cmpq    $8, %r8                 / 8 - len
  83         jb      .L16
  84         movq    0(%rsi), %rax           / rax = a[0]
  85         movq    8(%rsi), %r11           / prefetch a[1]
  86         mulq    %rcx                    / p = a[0] * digit
  87         addq    %r9, %rax
  88         adcq    $0, %rdx                / p += cy
  89         movq    %rax, 0(%rdi)           / r[0] = lo(p)
  90         movq    %rdx, %r9               / cy = hi(p)
  91 
  92         movq    %r11, %rax
  93         movq    16(%rsi), %r11          / prefetch a[2]


 202         addq    %r9, %rax
 203         adcq    $0, %rdx                / p += cy
 204         movq    %rax, 40(%rdi)          / r[5] = lo(p)
 205         movq    %rdx, %r9               / cy = hi(p)
 206         decq    %r8
 207         jz      .L17
 208 
 209         movq    48(%rsi), %rax
 210         mulq    %rcx                    / p = a[6] * digit
 211         addq    %r9, %rax
 212         adcq    $0, %rdx                / p += cy
 213         movq    %rax, 48(%rdi)          / r[6] = lo(p)
 214         movq    %rdx, %r9               / cy = hi(p)
 215         decq    %r8
 216         jz      .L17
 217 
 218 
 219 .L17:
 220         movq    %r9, %rax
 221         ret
 222         SET_SIZE(big_mul_set_vec64)
 223 

 224 / ------------------------------------------------------------------------
 225 /
 226 /  Implementation of big_mul_add_vec which exploits
 227 /  the 64X64->128 bit  unsigned multiply instruction.
 228 /
 229 /  As defined in Sun's bignum library for pkcs11, bignums are
 230 /  composed of an array of 32-bit "digits" along with descriptive
 231 /  information.  The arrays of digits are only required to be
 232 /  aligned on 32-bit boundary.  This implementation works only
 233 /  when the two factors and the result happen to be 64 bit aligned
 234 /  and have an even number of digits.
 235 /
 236 / ------------------------------------------------------------------------
 237 
 238 / r += a * digit, r and a are vectors of length len
 239 / returns the carry digit
 240 / r and a are 64 bit aligned.
 241 /
 242 / uint64_t
 243 / big_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
 244 /
 245         ENTRY(big_mul_add_vec64)
 246         xorq    %rax, %rax              / if (len == 0) return (0)
 247         testq   %rdx, %rdx
 248         jz      .L27
 249 
 250         movq    %rdx, %r8               / Use r8 for len; %rdx is used by mul
 251         xorq    %r9, %r9                / cy = 0
 252 
 253 .L25:
 254         cmpq    $8, %r8                 / 8 - len
 255         jb      .L26
 256         movq    0(%rsi), %rax           / rax = a[0]
 257         movq    0(%rdi), %r10           / r10 = r[0]
 258         movq    8(%rsi), %r11           / prefetch a[1]
 259         mulq    %rcx                    / p = a[0] * digit
 260         addq    %r10, %rax
 261         adcq    $0, %rdx                / p += r[0]
 262         movq    8(%rdi), %r10           / prefetch r[1]
 263         addq    %r9, %rax
 264         adcq    $0, %rdx                / p += cy
 265         movq    %rax, 0(%rdi)           / r[0] = lo(p)


 419         movq    %rdx, %r9               / cy = hi(p)
 420         decq    %r8
 421         jz      .L27
 422 
 423         movq    48(%rsi), %rax
 424         movq    48(%rdi), %r10
 425         mulq    %rcx                    / p = a[6] * digit
 426         addq    %r10, %rax
 427         adcq    $0, %rdx                / p += r[6]
 428         addq    %r9, %rax
 429         adcq    $0, %rdx                / p += cy
 430         movq    %rax, 48(%rdi)          / r[6] = lo(p)
 431         movq    %rdx, %r9               / cy = hi(p)
 432         decq    %r8
 433         jz      .L27
 434 
 435 
 436 .L27:
 437         movq    %r9, %rax
 438         ret
 439         SET_SIZE(big_mul_add_vec64)
 440 
 441 
 442 / void
 443 / big_sqr_vec64(uint64_t *r, uint64_t *a, int len)
 444 
 445         ENTRY(big_sqr_vec64)
 446         pushq   %rbx
 447         pushq   %rbp
 448         pushq   %r12
 449         pushq   %r13
 450         pushq   %r14
 451         pushq   %r15
 452         pushq   %rdx                    / save arg3, len
 453         pushq   %rsi                    / save arg2, a
 454         pushq   %rdi                    / save arg1, r
 455 
 456         leaq    8(%rdi), %r13           / tr = r + 1
 457         movq    %rsi, %r14              / ta = a
 458         movq    %rdx, %r15              / tlen = len
 459         decq    %r15                    / tlen = len - 1
 460         movq    %r13, %rdi              / arg1 = tr
 461         leaq    8(%r14), %rsi           / arg2 = ta + 1
 462         movq    %r15, %rdx              / arg3 = tlen
 463         movq    0(%r14), %rcx           / arg4 = ta[0]
 464         call    big_mul_set_vec64
 465         movq    %rax, 0(%r13, %r15, 8)  / tr[tlen] = cy
 466 .L31:
 467         decq    %r15                    / --tlen
 468         jz      .L32                    / while (--tlen != 0)
 469 
 470         addq    $16, %r13               / tr += 2
 471         addq    $8, %r14                / ++ta
 472         movq    %r13, %rdi              / arg1 = tr
 473         leaq    8(%r14), %rsi           / arg2 = ta + 1
 474         movq    %r15, %rdx              / arg3 = tlen
 475         movq    0(%r14), %rcx           / arg4 = ta[0]
 476         call    big_mul_add_vec64
 477         movq    %rax, 0(%r13, %r15, 8)  / tr[tlen] = cy
 478         jmp     .L31
 479 
 480 .L32:
 481 
 482 / No more function calls after this.
 483 / Restore arguments to registers.
 484 / However, don't use %rdx for arg3, len, because it is heavily
 485 / used by the hardware MUL instruction.  Use %r8, instead.
 486         movq    0(%rsp), %rdi           / %rdi == arg1 == r
 487         movq    8(%rsp), %rsi           / %rsi == arg2 == a
 488         movq    16(%rsp), %r8           / %r8  == arg3 == len
 489 
 490         movq    0(%rsi), %rax           / %rax = a[0];
 491         mulq    %rax                    / s = %edx:%eax = a[0]**2
 492         movq    %rax, 0(%rdi)           / r[0] = lo64(s)
 493         movq    %rdx, %r9               / cy = hi64(s)
 494         xorq    %rdx, %rdx
 495         movq    8(%rdi), %rax           / p = %rdx:%rax = r[1]
 496         addq    %rax, %rax


 535         movq    %rax, 8(%rdi, %r12, 8)  / r[col+1] = lo64(p)
 536         movq    %rdx, %r9               / cy = hi64(p)
 537 
 538         incq    %r11                    / ++row
 539         addq    $2, %r12                / col += 2
 540         jmp     .L33
 541 
 542 .L34:
 543         movq    %r9, 8(%rdi, %r12, 8)   / r[col+1] = lo64(cy)
 544 
 545         addq    $24, %rsp               / skip %rdi, %rsi, %rdx
 546         popq    %r15
 547         popq    %r14
 548         popq    %r13
 549         popq    %r12
 550         popq    %rbp
 551         popq    %rbx
 552 
 553         ret
 554 
 555         SET_SIZE(big_sqr_vec64)
 556 
 557 #endif  /* lint */
   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.

   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 


  26 #include <sys/asm_linkage.h>
  27 
  28 #if defined(lint) || defined(__lint)
  29 
  30 #include <sys/types.h>
  31 
  32 /* ARGSUSED */
  33 uint64_t
  34 big_mul_set_vec(uint64_t *r, uint64_t *a, int len, uint64_t digit)
  35 { return (0); }
  36 
  37 /* ARGSUSED */
  38 uint64_t
  39 big_mul_add_vec(uint64_t *r, uint64_t *a, int len, uint64_t digit)
  40 { return (0); }
  41 
  42 /* ARGSUSED */
  43 void
  44 big_sqr_vec(uint64_t *r, uint64_t *a, int len)
  45 {}
  46 
  47 #else   /* lint */
  48 
  49 / ------------------------------------------------------------------------
  50 /
  51 /  Implementation of big_mul_set_vec which exploits
  52 /  the 64X64->128 bit  unsigned multiply instruction.
  53 /
  54 /  As defined in Sun's bignum library for pkcs11, bignums are
  55 /  composed of an array of 64-bit "digits" or "chunks" along with
  56 /  descriptive information.



  57 /
  58 / ------------------------------------------------------------------------
  59 
  60 / r = a * digit, r and a are vectors of length len
  61 / returns the carry digit
  62 / r and a are 64 bit aligned.
  63 /
  64 / uint64_t
  65 / big_mul_set_vec(uint64_t *r, uint64_t *a, int len, uint64_t digit)
  66 /
  67         ENTRY(big_mul_set_vec)
  68         xorq    %rax, %rax              / if (len == 0) return (0)
  69         testq   %rdx, %rdx
  70         jz      .L17
  71 
  72         movq    %rdx, %r8               / Use r8 for len; %rdx is used by mul
  73         xorq    %r9, %r9                / cy = 0
  74 
  75 .L15:
  76         cmpq    $8, %r8                 / 8 - len
  77         jb      .L16
  78         movq    0(%rsi), %rax           / rax = a[0]
  79         movq    8(%rsi), %r11           / prefetch a[1]
  80         mulq    %rcx                    / p = a[0] * digit
  81         addq    %r9, %rax
  82         adcq    $0, %rdx                / p += cy
  83         movq    %rax, 0(%rdi)           / r[0] = lo(p)
  84         movq    %rdx, %r9               / cy = hi(p)
  85 
  86         movq    %r11, %rax
  87         movq    16(%rsi), %r11          / prefetch a[2]


 196         addq    %r9, %rax
 197         adcq    $0, %rdx                / p += cy
 198         movq    %rax, 40(%rdi)          / r[5] = lo(p)
 199         movq    %rdx, %r9               / cy = hi(p)
 200         decq    %r8
 201         jz      .L17
 202 
 203         movq    48(%rsi), %rax
 204         mulq    %rcx                    / p = a[6] * digit
 205         addq    %r9, %rax
 206         adcq    $0, %rdx                / p += cy
 207         movq    %rax, 48(%rdi)          / r[6] = lo(p)
 208         movq    %rdx, %r9               / cy = hi(p)
 209         decq    %r8
 210         jz      .L17
 211 
 212 
 213 .L17:
 214         movq    %r9, %rax
 215         ret
 216         SET_SIZE(big_mul_set_vec)
 217 
 218 
 219 / ------------------------------------------------------------------------
 220 /
 221 /  Implementation of big_mul_add_vec which exploits
 222 /  the 64X64->128 bit  unsigned multiply instruction.
 223 /
 224 /  As defined in Sun's bignum library for pkcs11, bignums are
 225 /  composed of an array of 64-bit "digits" or "chunks" along with
 226 /  descriptive information.



 227 /
 228 / ------------------------------------------------------------------------
 229 
 230 / r += a * digit, r and a are vectors of length len
 231 / returns the carry digit
 232 / r and a are 64 bit aligned.
 233 /
 234 / uint64_t
 235 / big_mul_add_vec(uint64_t *r, uint64_t *a, int len, uint64_t digit)
 236 /
 237         ENTRY(big_mul_add_vec)
 238         xorq    %rax, %rax              / if (len == 0) return (0)
 239         testq   %rdx, %rdx
 240         jz      .L27
 241 
 242         movq    %rdx, %r8               / Use r8 for len; %rdx is used by mul
 243         xorq    %r9, %r9                / cy = 0
 244 
 245 .L25:
 246         cmpq    $8, %r8                 / 8 - len
 247         jb      .L26
 248         movq    0(%rsi), %rax           / rax = a[0]
 249         movq    0(%rdi), %r10           / r10 = r[0]
 250         movq    8(%rsi), %r11           / prefetch a[1]
 251         mulq    %rcx                    / p = a[0] * digit
 252         addq    %r10, %rax
 253         adcq    $0, %rdx                / p += r[0]
 254         movq    8(%rdi), %r10           / prefetch r[1]
 255         addq    %r9, %rax
 256         adcq    $0, %rdx                / p += cy
 257         movq    %rax, 0(%rdi)           / r[0] = lo(p)


 411         movq    %rdx, %r9               / cy = hi(p)
 412         decq    %r8
 413         jz      .L27
 414 
 415         movq    48(%rsi), %rax
 416         movq    48(%rdi), %r10
 417         mulq    %rcx                    / p = a[6] * digit
 418         addq    %r10, %rax
 419         adcq    $0, %rdx                / p += r[6]
 420         addq    %r9, %rax
 421         adcq    $0, %rdx                / p += cy
 422         movq    %rax, 48(%rdi)          / r[6] = lo(p)
 423         movq    %rdx, %r9               / cy = hi(p)
 424         decq    %r8
 425         jz      .L27
 426 
 427 
 428 .L27:
 429         movq    %r9, %rax
 430         ret
 431         SET_SIZE(big_mul_add_vec)
 432 
 433 
 434 / void
 435 / big_sqr_vec(uint64_t *r, uint64_t *a, int len)
 436 
 437         ENTRY(big_sqr_vec)
 438         pushq   %rbx
 439         pushq   %rbp
 440         pushq   %r12
 441         pushq   %r13
 442         pushq   %r14
 443         pushq   %r15
 444         pushq   %rdx                    / save arg3, len
 445         pushq   %rsi                    / save arg2, a
 446         pushq   %rdi                    / save arg1, r
 447 
 448         leaq    8(%rdi), %r13           / tr = r + 1
 449         movq    %rsi, %r14              / ta = a
 450         movq    %rdx, %r15              / tlen = len
 451         decq    %r15                    / tlen = len - 1
 452         movq    %r13, %rdi              / arg1 = tr
 453         leaq    8(%r14), %rsi           / arg2 = ta + 1
 454         movq    %r15, %rdx              / arg3 = tlen
 455         movq    0(%r14), %rcx           / arg4 = ta[0]
 456         call    big_mul_set_vec
 457         movq    %rax, 0(%r13, %r15, 8)  / tr[tlen] = cy
 458 .L31:
 459         decq    %r15                    / --tlen
 460         jz      .L32                    / while (--tlen != 0)
 461 
 462         addq    $16, %r13               / tr += 2
 463         addq    $8, %r14                / ++ta
 464         movq    %r13, %rdi              / arg1 = tr
 465         leaq    8(%r14), %rsi           / arg2 = ta + 1
 466         movq    %r15, %rdx              / arg3 = tlen
 467         movq    0(%r14), %rcx           / arg4 = ta[0]
 468         call    big_mul_add_vec
 469         movq    %rax, 0(%r13, %r15, 8)  / tr[tlen] = cy
 470         jmp     .L31
 471 
 472 .L32:
 473 
 474 / No more function calls after this.
 475 / Restore arguments to registers.
 476 / However, don't use %rdx for arg3, len, because it is heavily
 477 / used by the hardware MUL instruction.  Use %r8, instead.
 478         movq    0(%rsp), %rdi           / %rdi == arg1 == r
 479         movq    8(%rsp), %rsi           / %rsi == arg2 == a
 480         movq    16(%rsp), %r8           / %r8  == arg3 == len
 481 
 482         movq    0(%rsi), %rax           / %rax = a[0];
 483         mulq    %rax                    / s = %edx:%eax = a[0]**2
 484         movq    %rax, 0(%rdi)           / r[0] = lo64(s)
 485         movq    %rdx, %r9               / cy = hi64(s)
 486         xorq    %rdx, %rdx
 487         movq    8(%rdi), %rax           / p = %rdx:%rax = r[1]
 488         addq    %rax, %rax


 527         movq    %rax, 8(%rdi, %r12, 8)  / r[col+1] = lo64(p)
 528         movq    %rdx, %r9               / cy = hi64(p)
 529 
 530         incq    %r11                    / ++row
 531         addq    $2, %r12                / col += 2
 532         jmp     .L33
 533 
 534 .L34:
 535         movq    %r9, 8(%rdi, %r12, 8)   / r[col+1] = lo64(cy)
 536 
 537         addq    $24, %rsp               / skip %rdi, %rsi, %rdx
 538         popq    %r15
 539         popq    %r14
 540         popq    %r13
 541         popq    %r12
 542         popq    %rbp
 543         popq    %rbx
 544 
 545         ret
 546 
 547         SET_SIZE(big_sqr_vec)
 548 
 549 #endif  /* lint */