The unified diff between revisions [0dc5b2d6..] and [b14c94b9..] is displayed below. It can also be downloaded as a raw diff.

#
#
# add_dir "tombc"
#
# add_file "TODO"
#  content [dcb353ac6dcd7b12849ba072ba980e49ff2a2883]
#
# add_file "bn_mp_reduce_2k_l.c"
#  content [18cbea4d1de3dcd4ae121ef87f750fd71b936d65]
#
# add_file "bn_mp_reduce_2k_setup_l.c"
#  content [2f7604831262ef985d2d84fff4079f660e9d4355]
#
# add_file "bn_mp_reduce_is_2k_l.c"
#  content [f8a3ff10225afdfb3aa25c9de44066343247722a]
#
# add_file "bn_mp_to_signed_bin_n.c"
#  content [c3aea4a9e1164e03d1d58338de680a6f55b5ad8f]
#
# add_file "bn_mp_to_unsigned_bin_n.c"
#  content [90b468fd50ea8c8151e0f40e6aa8ac302ec2fe46]
#
# add_file "logs/expt_2kl.log"
#  content [6302bb8eaffcc9d3dc8fd60dbe24c2c68c873984]
#
# add_file "tombc/grammar.txt"
#  content [0595ea070a28f54a97570563ad5431b3084d7e7c]
#
# patch "bn.pdf"
#  from [7f4a5f11d6752ff679a3d3f7fc153687d6d6abba]
#    to [8fed3f365c2a3324c4a4c81b82c046b116f1f0c2]
#
# patch "bn.tex"
#  from [36834c9317131d36ca59ec5bcbdd2cfa7109fd4d]
#    to [20cadcede6073a9027d85eb340269f5f15737f36]
#
# patch "bn_fast_mp_invmod.c"
#  from [08f10544b85d8060d7f7afe57fed583c889e3c9c]
#    to [4d3634c512613a6a57a72c5664058c4a47e9a960]
#
# patch "bn_fast_mp_montgomery_reduce.c"
#  from [325a7d4683d34160114e1c5c4bdee24f78d53e34]
#    to [78d8d5998865b41471663c55715fb1dd6752cee7]
#
# patch "bn_fast_s_mp_mul_digs.c"
#  from [0672a145d0569d227d950c2555234afe2ab394d6]
#    to [9a92ee7590332a1db78b9d37171680e6b87e1670]
#
# patch "bn_fast_s_mp_mul_high_digs.c"
#  from [0d7785aa91e231bdc765d67ff4074ecc29d556a2]
#    to [ceff3a71492dd8079556c72a83364f4930e9e733]
#
# patch "bn_fast_s_mp_sqr.c"
#  from [5765234e01ae11780dcaade97742404013b1da42]
#    to [eda475fe947ffc90906c7167eec15450c2209362]
#
# patch "bn_mp_div.c"
#  from [d993f550cc6ff9caa00c918bbfba773949fc65ca]
#    to [0f214a2ee7c1cfd6152ebd473d2f04db67f2f86c]
#
# patch "bn_mp_dr_reduce.c"
#  from [1736b0957a7bbeabb32ba902730f8dd031ab8a97]
#    to [e66ad3324fbe632c9168616227cab4bafada767a]
#
# patch "bn_mp_exptmod.c"
#  from [9a6256e60d747e910cc0f089a7cc51f3fae51f39]
#    to [5834ec7efb5ce20b64f84e8015eaa59bfd93f090]
#
# patch "bn_mp_exptmod_fast.c"
#  from [ec6cb8ea6c2dde8b87493bd481a51c9f8f946ede]
#    to [0c597499c614a4fc38dbf06574749138133f69b2]
#
# patch "bn_mp_exteuclid.c"
#  from [3c13b9da30303b4bb43aa8a362d695f214c67a94]
#    to [84910598afbed46c88cd0802b8c2607c8ea9e1d0]
#
# patch "bn_mp_gcd.c"
#  from [75332794a8c6790c417d7cdcc51c92be8d5bdc91]
#    to [a5ec727015fd307450f7718c481285dd1599d5d0]
#
# patch "bn_mp_invmod_slow.c"
#  from [c0acf30269eaf91f32b682b1b2d4150ee566f1a5]
#    to [0e68e06289ca853df2e27007a793f251eeb6db76]
#
# patch "bn_mp_jacobi.c"
#  from [5f0894cc606a9979ce7008fc29d56c24cf53e9d6]
#    to [9c41eb9191d5875db6dd10ef168fedbe621a3b2a]
#
# patch "bn_mp_lcm.c"
#  from [525b0299d60ea98ecbe42622d6f44d6e64ea32a7]
#    to [ae6e40952e57e849e762a25ce87385e02fb4bdcd]
#
# patch "bn_mp_mod_2d.c"
#  from [6860d01c3277c6edd0a7ca03dd22248983306b8e]
#    to [e0e5f0ab899aae9477bf97785f85320ad0b78865]
#
# patch "bn_mp_montgomery_calc_normalization.c"
#  from [fa4dcf6f68dfbab19a37ab68781ef3763b2e904d]
#    to [41769e061872600b8d99328e343fe770b7b3aec4]
#
# patch "bn_mp_mul_d.c"
#  from [fe7fe17b0dc44d3f3c6ab78998691e412280255e]
#    to [684e50bde0381306076b4b64d2ecbab99b7af195]
#
# patch "bn_mp_n_root.c"
#  from [339cf93663939605c6304419a020f33d4184b3dd]
#    to [ceda8bdd58d2fb45707be024bc9ad46ceb44a337]
#
# patch "bn_mp_neg.c"
#  from [afc6d203cc58d956e118f218c440bca91f94adce]
#    to [02cb8735d235feb2f3016b581f23d6da582f8765]
#
# patch "bn_mp_prime_fermat.c"
#  from [7a08d1ab74369ae14b39a95258a6b955e0d7be22]
#    to [bed25e3d7aef4e9ce0bd9642626e082b7bc02e73]
#
# patch "bn_mp_prime_is_divisible.c"
#  from [7536731238b627bc2d90c623d38fd2952bf0dba8]
#    to [ac6f9ad19e8ee7c27df8dab72b160090de3f88e5]
#
# patch "bn_mp_prime_is_prime.c"
#  from [2c13d4e47af42472ed769696f340f3272cfae3c6]
#    to [616c045068c12c0d7cd8a73080ef55da4e501493]
#
# patch "bn_mp_prime_miller_rabin.c"
#  from [aef8e16b5107c8424ed96e143d41ac1f0c419a09]
#    to [e893c60f539dce9136d4afe96650b4e5a649876b]
#
# patch "bn_mp_prime_next_prime.c"
#  from [23111d40777e3c227c814ceb8726f187328b1db9]
#    to [cc04aebd4d5b5e203b330e0f4a8180c48be6af13]
#
# patch "bn_mp_prime_random_ex.c"
#  from [31f8a27d870e541417b0ca1a911480e1e060169f]
#    to [4cba143b51b505f3c881caf21f5879695d0dcefa]
#
# patch "bn_mp_radix_size.c"
#  from [891ae52347addbb788c35c3c11e4912c6f6d7c35]
#    to [cea57d1d767181e6a51d13d3dd79684a4e62482a]
#
# patch "bn_mp_rand.c"
#  from [139744910e08eeab5d37af0510cea07399d318ac]
#    to [21e113ee13fd97a003bb05376e3bf1c706469d43]
#
# patch "bn_mp_read_radix.c"
#  from [a29dabc5cc4dfe65e231782ea78675ca067302b7]
#    to [a761d92fa0571ee25a4d8e25029b65ed715f5c41]
#
# patch "bn_mp_reduce.c"
#  from [6e809ceeb191e65004ef4d040f5f8b14a2253074]
#    to [22c1ae63de48d5cc0e48db4bcadef234f025e060]
#
# patch "bn_mp_reduce_2k.c"
#  from [9aa2bd43d0e23a59c71b2ee1461823cad97cbc1b]
#    to [3a75893da1c3a1c868056cb31e118a3683200bd5]
#
# patch "bn_mp_reduce_2k_setup.c"
#  from [6989724dbee168ff723d4732c1f5bf29fb05dd33]
#    to [94f2c64d767763bc4c38322b408d768ca2aa4ecc]
#
# patch "bn_mp_reduce_is_2k.c"
#  from [daa704193df12dfc34059c8a7eb5223470b9464f]
#    to [4d4b366051978336c6ab81f3e987b0aa19c16812]
#
# patch "bn_mp_to_signed_bin.c"
#  from [7c004817480da3880393125065bb1c86994e49aa]
#    to [37639284a790d77339bcd7f53e66a18017563dd6]
#
# patch "bn_mp_to_unsigned_bin.c"
#  from [b63653084377cf62c63b7ad3f5fbe8b3bc1daa7e]
#    to [efb1eee1129615146646f646378a25742c9a6492]
#
# patch "bn_mp_toom_mul.c"
#  from [ef10988525e274d5e954a11fc04edb96bc4590a3]
#    to [8a45c9922034269d563cbdbd1b55d772ccc06056]
#
# patch "bn_mp_unsigned_bin_size.c"
#  from [30a163d04f2e09c8df11ab2f8e9f781adb500ec4]
#    to [8b1963305618f9f8effe51212c9794d59e8d6c24]
#
# patch "bn_mp_xor.c"
#  from [fe21cdda0a62908ce461de7a806cfdd743c4db1b]
#    to [6a3de9432fe1ad939de000694be657772cd913e7]
#
# patch "bn_mp_zero.c"
#  from [130f819e9e81d1206739e40b52df5b86a82d584c]
#    to [4247063811e504255b015c2cc19acb28d2a304e4]
#
# patch "bn_prime_tab.c"
#  from [55558f0b3a503f1604933fb2d05e900af8ea9c40]
#    to [fec54b5cd76b20913620ffe6922c6a37ee96e1b7]
#
# patch "bn_s_mp_exptmod.c"
#  from [9af99058a9d79e6032e4c8302c568dc89b9a8d4d]
#    to [7ac1accce85a140678573008fcdd67f2834737ed]
#
# patch "bn_s_mp_mul_digs.c"
#  from [132dd48c3ff5effbb5ea45cdcd744ee77d7253da]
#    to [d312dd371b770bf210c2d96f267247163ab15e0f]
#
# patch "bn_s_mp_sqr.c"
#  from [73569fd80f5b6535e45cd018a71661f52e4eb9b1]
#    to [eba4101c1f1ff9dc11fadf942075e3e0fff40737]
#
# patch "bncore.c"
#  from [9168a11431458dface0fe16fb1cb4e5c483ff314]
#    to [2a706b0ca7b0d41a6ef37bc33c26d2d2a4338b7e]
#
# patch "callgraph.txt"
#  from [1c575d5b40fd8adc98fcbae3be9b0b992d9fd7ac]
#    to [bc126b493294098a3277e79cc94bcb17871a085c]
#
# patch "changes.txt"
#  from [07d05c9531404b01bcd359e42821c4d039e3f851]
#    to [686b8da0e27120cb775f6960c80f1efc7b80940d]
#
# patch "demo/demo.c"
#  from [58eeef30dee744087fb774f617ff7033b18d77e0]
#    to [ad8e1506fbbd176371eece0e73edc6696b4ab79d]
#
# patch "demo/timing.c"
#  from [98827dad0973588bcd4b325718442b1ab0efb719]
#    to [495dc5a5ca04a74a7ac7146d3c40c1c0cde360a2]
#
# patch "dep.pl"
#  from [b154a4d839947d3a5b4218fb3b76284469091256]
#    to [092d03d71c545ed12af9341beeee45457515c9d1]
#
# patch "etc/mersenne.c"
#  from [4324b77f9ff20f45dba21ada611c786665030b3d]
#    to [0b7491b74ebf52193361a1f1011866a56cf277bd]
#
# patch "etc/pprime.c"
#  from [216818739f847e9b2e1dbb302d206d6c8cffccec]
#    to [5b67dfe9a0cda06431fcee2cce3c23636671ae02]
#
# patch "etc/tune.c"
#  from [935b41e6df657dd2ceede4e1311e6848a432893a]
#    to [9cc60da474172eb1273756067d1676b9c1beac36]
#
# patch "logs/add.log"
#  from [a1a423131bd0068100058228aad36c31c53ab2c7]
#    to [528863a29a3d5176b6fb34eef261663081b88c7f]
#
# patch "logs/expt.log"
#  from [da39a3ee5e6b4b0d3255bfef95601890afd80709]
#    to [fdc7d0684f006f7cd60c501e68213974c165e332]
#
# patch "logs/expt_2k.log"
#  from [da39a3ee5e6b4b0d3255bfef95601890afd80709]
#    to [e3590c62739763a41c1a34365e595568d1ad6444]
#
# patch "logs/expt_dr.log"
#  from [da39a3ee5e6b4b0d3255bfef95601890afd80709]
#    to [0c04f650971da4f1d1ecc94caf4598c61ecf8d7e]
#
# patch "logs/mult.log"
#  from [b9d3dd04155318a5a87ee0b8d76c9174ccddad17]
#    to [5f0669e360af91568d5b9bb16a4e920c2c265e7f]
#
# patch "logs/mult_kara.log"
#  from [55b8d281c45ca7689e48d924801bea8ee1cf371e]
#    to [5ef11b188fc3c11588d5d31292e5b7462482a209]
#
# patch "logs/sqr.log"
#  from [27daa1a1360faa0c921fb069471a14f6a64e256a]
#    to [2817d882b889dbb7827e1ede5e4e0be59c88f4a7]
#
# patch "logs/sqr_kara.log"
#  from [a5458d81446e82a302429ae75e0135576c82c346]
#    to [a155843ee310698ab5ed583bc4e346f48d84937b]
#
# patch "logs/sub.log"
#  from [730a1558fa8a5f8370ed0e296ad0fc2a6d1ae264]
#    to [54c88133ac76a3a3e6ef5a782e41abb90a50ec2d]
#
# patch "makefile"
#  from [b959d8ba1f022caeb787e2f9a397e23049c43af9]
#    to [0db3fd66a1f1ef47d182d20b71279c91070272da]
#
# patch "makefile.bcc"
#  from [6ff4455fd30f15d67594b85c18ed6fbdfa2326d5]
#    to [6dc71811ee3aeb409b7ced958e62abd55bd3eeda]
#
# patch "makefile.cygwin_dll"
#  from [2ac775af392c3690497d9fae518083585045fc97]
#    to [488c52dd666e16a9c89e0016414a22b8efd687eb]
#
# patch "makefile.icc"
#  from [4155389d03c96022b3469acdca2ab3452475a0d3]
#    to [eb7a66a15792f0e46e27620b4351e367995953c7]
#
# patch "makefile.msvc"
#  from [d2c181e3340ba3cc1adb26c65c7e18b9e0d39ff1]
#    to [37604182da728a3357ca462f509928d8ff10ed7c]
#
# patch "makefile.shared"
#  from [c948f0e25a808a63df3e661392ae815aa1515734]
#    to [ea1c88425c89e89689fd2e5ea95ae0c7888cb2bf]
#
# patch "mtest/mtest.c"
#  from [c149902fa871e967295b7b33b01914088ee054ab]
#    to [924788f719398e30fb3ab846ced8aa3940264956]
#
# patch "poster.pdf"
#  from [57892edde6aa6ec6faa875a27df39e3bd054fcf6]
#    to [fbccdb53e134353586a0d798924d1ccdf891cb6b]
#
# patch "pre_gen/mpi.c"
#  from [b8de79c7fe9009ecf50a8a1d5267e9ce888c6b50]
#    to [e203f2d5fceec0ccc521ff44274ef4c3788e5c1e]
#
# patch "tommath.h"
#  from [d7e663fdcde877eb8c7d2656af2ffe5129a42d37]
#    to [e18fb765c96711cccdb5c6ff11f43dbe00bf7295]
#
# patch "tommath.pdf"
#  from [0e06f40b55ed705fe3a0e1e21812f9527e4f8237]
#    to [05c8ae7aa8f77d2f7e59c0c436ede5fc8fc25ad4]
#
# patch "tommath.src"
#  from [665b9cf81a130c911f7c8baee277aa7a565d652e]
#    to [a40b150bd8b162b53e0175cbf1e8b36db5358cb8]
#
# patch "tommath.tex"
#  from [af59d0cd6ddc7dfed2554343a319f578eccdbd11]
#    to [e721fdf759d52b5923a7a7ec76538533174dc57b]
#
# patch "tommath_class.h"
#  from [d2d0596bd40615d49d7d9bdf8036e01e3d2e21a4]
#    to [a91c35ab0e5d115c16d6c6c9f6a107d22ee31d55]
#
============================================================
--- TODO	dcb353ac6dcd7b12849ba072ba980e49ff2a2883
+++ TODO	dcb353ac6dcd7b12849ba072ba980e49ff2a2883
@@ -0,0 +1,16 @@
+things for book in order of importance...
+
+- Fix up pseudo-code [only] for combas that are not consistent with source
+- Start in chapter 3 [basics] and work up...
+   - re-write to prose [less abrupt]
+   - clean up pseudo code [spacing]
+   - more examples where appropriate and figures
+
+Goal:
+   - Get sync done by mid January [roughly 8-12 hours work]
+   - Finish ch3-6 by end of January [roughly 12-16 hours of work]
+   - Finish ch7-end by mid Feb [roughly 20-24 hours of work].
+
+Goal isn't "first edition" but merely cleaner to read.
+
+
============================================================
--- bn_mp_reduce_2k_l.c	18cbea4d1de3dcd4ae121ef87f750fd71b936d65
+++ bn_mp_reduce_2k_l.c	18cbea4d1de3dcd4ae121ef87f750fd71b936d65
@@ -0,0 +1,58 @@
+#include <tommath.h>
+#ifdef BN_MP_REDUCE_2K_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* reduces a modulo n where n is of the form 2**p - d
+   This differs from reduce_2k since "d" can be larger
+   than a single digit.
+*/
+int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
+{
+   mp_int q;
+   int    p, res;
+
+   if ((res = mp_init(&q)) != MP_OKAY) {
+      return res;
+   }
+
+   p = mp_count_bits(n);
+top:
+   /* q = a/2**p, a = a mod 2**p */
+   if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
+      goto ERR;
+   }
+
+   /* q = q * d */
+   if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
+      goto ERR;
+   }
+
+   /* a = a + q */
+   if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
+      goto ERR;
+   }
+
+   if (mp_cmp_mag(a, n) != MP_LT) {
+      s_mp_sub(a, n, a);
+      goto top;
+   }
+
+ERR:
+   mp_clear(&q);
+   return res;
+}
+
+#endif
============================================================
--- bn_mp_reduce_2k_setup_l.c	2f7604831262ef985d2d84fff4079f660e9d4355
+++ bn_mp_reduce_2k_setup_l.c	2f7604831262ef985d2d84fff4079f660e9d4355
@@ -0,0 +1,40 @@
+#include <tommath.h>
+#ifdef BN_MP_REDUCE_2K_SETUP_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines the setup value */
+int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
+{
+   int    res;
+   mp_int tmp;
+
+   if ((res = mp_init(&tmp)) != MP_OKAY) {
+      return res;
+   }
+
+   if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
+      goto ERR;
+   }
+
+   if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
+      goto ERR;
+   }
+
+ERR:
+   mp_clear(&tmp);
+   return res;
+}
+#endif
============================================================
--- bn_mp_reduce_is_2k_l.c	f8a3ff10225afdfb3aa25c9de44066343247722a
+++ bn_mp_reduce_is_2k_l.c	f8a3ff10225afdfb3aa25c9de44066343247722a
@@ -0,0 +1,40 @@
+#include <tommath.h>
+#ifdef BN_MP_REDUCE_IS_2K_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* determines if reduce_2k_l can be used */
+int mp_reduce_is_2k_l(mp_int *a)
+{
+   int ix, iy;
+
+   if (a->used == 0) {
+      return MP_NO;
+   } else if (a->used == 1) {
+      return MP_YES;
+   } else if (a->used > 1) {
+      /* if more than half of the digits are -1 we're sold */
+      for (iy = ix = 0; ix < a->used; ix++) {
+          if (a->dp[ix] == MP_MASK) {
+              ++iy;
+          }
+      }
+      return (iy >= (a->used/2)) ? MP_YES : MP_NO;
+
+   }
+   return MP_NO;
+}
+
+#endif
============================================================
--- bn_mp_to_signed_bin_n.c	c3aea4a9e1164e03d1d58338de680a6f55b5ad8f
+++ bn_mp_to_signed_bin_n.c	c3aea4a9e1164e03d1d58338de680a6f55b5ad8f
@@ -0,0 +1,27 @@
+#include <tommath.h>
+#ifdef BN_MP_TO_SIGNED_BIN_N_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* store in signed [big endian] format */
+int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
+{
+   if (*outlen < (unsigned long)mp_signed_bin_size(a)) {
+      return MP_VAL;
+   }
+   *outlen = mp_signed_bin_size(a);
+   return mp_to_signed_bin(a, b);
+}
+#endif
============================================================
--- bn_mp_to_unsigned_bin_n.c	90b468fd50ea8c8151e0f40e6aa8ac302ec2fe46
+++ bn_mp_to_unsigned_bin_n.c	90b468fd50ea8c8151e0f40e6aa8ac302ec2fe46
@@ -0,0 +1,27 @@
+#include <tommath.h>
+#ifdef BN_MP_TO_UNSIGNED_BIN_N_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+ *
+ * LibTomMath is a library that provides multiple-precision
+ * integer arithmetic as well as number theoretic functionality.
+ *
+ * The library was designed directly after the MPI library by
+ * Michael Fromberger but has been written from scratch with
+ * additional optimizations in place.
+ *
+ * The library is free for all purposes without any express
+ * guarantee it works.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
+ */
+
+/* store in unsigned [big endian] format */
+int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
+{
+   if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {
+      return MP_VAL;
+   }
+   *outlen = mp_unsigned_bin_size(a);
+   return mp_to_unsigned_bin(a, b);
+}
+#endif
============================================================
--- logs/expt_2kl.log	6302bb8eaffcc9d3dc8fd60dbe24c2c68c873984
+++ logs/expt_2kl.log	6302bb8eaffcc9d3dc8fd60dbe24c2c68c873984
@@ -0,0 +1,4 @@
+1024   6954080
+2048  35993987
+4096 176068521
+521   1683720
============================================================
--- tombc/grammar.txt	0595ea070a28f54a97570563ad5431b3084d7e7c
+++ tombc/grammar.txt	0595ea070a28f54a97570563ad5431b3084d7e7c
@@ -0,0 +1,35 @@
+program       := program statement | statement | empty
+statement     := { statement }                                                                              |
+                 identifier = numexpression;                                                                |
+                 identifier[numexpression] = numexpression;                                                 |
+                 function(expressionlist);                                                                  |
+                 for (identifer = numexpression; numexpression; identifier = numexpression) { statement }   |
+                 while (numexpression) { statement }                                                        |
+                 if (numexpresion) { statement } elif                                                       |
+                 break;                                                                                     |
+                 continue;
+
+elif          := else statement | empty
+function      := abs | countbits | exptmod | jacobi | print | isprime | nextprime | issquare | readinteger | exit
+expressionlist := expressionlist, expression | expression
+
+// LR(1) !!!?
+expression    := string | numexpression
+numexpression := cmpexpr && cmpexpr | cmpexpr \|\| cmpexpr | cmpexpr
+cmpexpr       := boolexpr  < boolexpr | boolexpr  > boolexpr | boolexpr == boolexpr |
+                 boolexpr <= boolexpr | boolexpr >= boolexpr | boolexpr
+boolexpr      := shiftexpr & shiftexpr | shiftexpr ^ shiftexpr | shiftexpr \| shiftexpr | shiftexpr
+shiftexpr     := addsubexpr << addsubexpr | addsubexpr >> addsubexpr | addsubexpr
+addsubexpr    := mulexpr + mulexpr | mulexpr - mulexpr | mulexpr
+mulexpr       := expr * expr       | expr / expr | expr % expr | expr
+expr          := -nexpr | nexpr
+nexpr         := integer | identifier | ( numexpression ) | identifier[numexpression]
+
+identifier    := identifer digits | identifier alpha | alpha
+alpha         := a ... z | A ... Z
+integer       := hexnumber | digits
+hexnumber     := 0xhexdigits
+hexdigits     := hexdigits hexdigit | hexdigit
+hexdigit      := 0 ... 9 | a ... f | A ... F
+digits        := digits digit | digit
+digit         := 0 ... 9
============================================================
# bn.pdf is binary
============================================================
--- bn.tex	36834c9317131d36ca59ec5bcbdd2cfa7109fd4d
+++ bn.tex	20cadcede6073a9027d85eb340269f5f15737f36
@@ -49,7 +49,7 @@
 \begin{document}
 \frontmatter
 \pagestyle{empty}
-\title{LibTomMath User Manual \\ v0.32}
+\title{LibTomMath User Manual \\ v0.35}
 \author{Tom St Denis \\ tomstdenis@iahu.ca}
 \maketitle
 This text, the library and the accompanying textbook are all hereby placed in the public domain.  This book has been
@@ -263,12 +263,12 @@ \section{Purpose of LibTomMath}
 \begin{center}
 \begin{tabular}{|l|c|c|l|}
 \hline \textbf{Criteria} & \textbf{Pro} & \textbf{Con} & \textbf{Notes} \\
-\hline Few lines of code per file & X & & GnuPG $ = 300.9$, LibTomMath  $ = 76.04$ \\
+\hline Few lines of code per file & X & & GnuPG $ = 300.9$, LibTomMath  $ = 71.97$ \\
 \hline Commented function prototypes & X && GnuPG function names are cryptic. \\
 \hline Speed && X & LibTomMath is slower.  \\
 \hline Totally free & X & & GPL has unfavourable restrictions.\\
 \hline Large function base & X & & GnuPG is barebones. \\
-\hline Four modular reduction algorithms & X & & Faster modular exponentiation. \\
+\hline Five modular reduction algorithms & X & & Faster modular exponentiation for a variety of moduli. \\
 \hline Portable & X & & GnuPG requires configuration to build. \\
 \hline
 \end{tabular}
@@ -284,9 +284,12 @@ \section{Purpose of LibTomMath}
 So it may feel tempting to just rip the math code out of GnuPG (or GnuMP where it was taken from originally) in your
 own application but I think there are reasons not to.  While LibTomMath is slower than libraries such as GnuMP it is
 not normally significantly slower.  On x86 machines the difference is normally a factor of two when performing modular
-exponentiations.
+exponentiations.  It depends largely on the processor, compiler and the moduli being used.

-Essentially the only time you wouldn't use LibTomMath is when blazing speed is the primary concern.
+Essentially the only time you wouldn't use LibTomMath is when blazing speed is the primary concern.  However,
+on the other side of the coin LibTomMath offers you a totally free (public domain) well structured math library
+that is very flexible, complete and performs well in resource contrained environments.  Fast RSA for example can
+be performed with as little as 8KB of ram for data (again depending on build options).

 \chapter{Getting Started with LibTomMath}
 \section{Building Programs}
@@ -809,7 +812,7 @@ \subsection{Unsigned comparison}

 \index{mp\_cmp\_mag}
 \begin{alltt}
-int mp_cmp(mp_int * a, mp_int * b);
+int mp_cmp_mag(mp_int * a, mp_int * b);
 \end{alltt}
 This will compare $a$ to $b$ placing $a$ to the left of $b$.  This function cannot fail and will return one of the
 three compare codes listed in figure \ref{fig:CMP}.
@@ -1220,12 +1223,13 @@ \section{Squaring}
 \end{alltt}

 Will square $a$ and store it in $b$.  Like the case of multiplication there are four different squaring
-algorithms all which can be called from mp\_sqr().  It is ideal to use mp\_sqr over mp\_mul when squaring terms.
+algorithms all which can be called from mp\_sqr().  It is ideal to use mp\_sqr over mp\_mul when squaring terms because
+of the speed difference.

 \section{Tuning Polynomial Basis Routines}

 Both of the Toom-Cook and Karatsuba multiplication algorithms are faster than the traditional $O(n^2)$ approach that
-the Comba and baseline algorithms use.  At $O(n^{1.464973})$ and $O(n^{1.584962})$ running times respectfully they require
+the Comba and baseline algorithms use.  At $O(n^{1.464973})$ and $O(n^{1.584962})$ running times respectively they require
 considerably less work.  For example, a 10000-digit multiplication would take roughly 724,000 single precision
 multiplications with Toom-Cook or 100,000,000 single precision multiplications with the standard Comba (a factor
 of 138).
@@ -1297,14 +1301,14 @@ \section{Barrett Reduction}
 \section{Barrett Reduction}

 Barrett reduction is a generic optimized reduction algorithm that requires pre--computation to achieve
-a decent speedup over straight division.  First a $mu$ value must be precomputed with the following function.
+a decent speedup over straight division.  First a $\mu$ value must be precomputed with the following function.

 \index{mp\_reduce\_setup}
 \begin{alltt}
 int mp_reduce_setup(mp_int *a, mp_int *b);
 \end{alltt}

-Given a modulus in $b$ this produces the required $mu$ value in $a$.  For any given modulus this only has to
+Given a modulus in $b$ this produces the required $\mu$ value in $a$.  For any given modulus this only has to
 be computed once.  Modular reduction can now be performed with the following.

 \index{mp\_reduce}
@@ -1312,7 +1316,7 @@ \section{Barrett Reduction}
 int mp_reduce(mp_int *a, mp_int *b, mp_int *c);
 \end{alltt}

-This will reduce $a$ in place modulo $b$ with the precomputed $mu$ value in $c$.  $a$ must be in the range
+This will reduce $a$ in place modulo $b$ with the precomputed $\mu$ value in $c$.  $a$ must be in the range
 $0 \le a < b^2$.

 \begin{alltt}
@@ -1578,7 +1582,8 @@ \section{Root Finding}
 This algorithm uses the ``Newton Approximation'' method and will converge on the correct root fairly quickly.  Since
 the algorithm requires raising $a$ to the power of $b$ it is not ideal to attempt to find roots for large
 values of $b$.  If particularly large roots are required then a factor method could be used instead.  For example,
-$a^{1/16}$ is equivalent to $\left (a^{1/4} \right)^{1/4}$.
+$a^{1/16}$ is equivalent to $\left (a^{1/4} \right)^{1/4}$ or simply
+$\left ( \left ( \left ( a^{1/2} \right )^{1/2} \right )^{1/2} \right )^{1/2}$

 \chapter{Prime Numbers}
 \section{Trial Division}
============================================================
--- bn_fast_mp_invmod.c	08f10544b85d8060d7f7afe57fed583c889e3c9c
+++ bn_fast_mp_invmod.c	4d3634c512613a6a57a72c5664058c4a47e9a960
@@ -21,8 +21,7 @@
  * Based on slow invmod except this is optimized for the case where b is
  * odd as per HAC Note 14.64 on pp. 610
  */
-int
-fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
+int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
 {
   mp_int  x, y, u, v, B, D;
   int     res, neg;
@@ -39,20 +38,20 @@ fast_mp_invmod (mp_int * a, mp_int * b,

   /* x == modulus, y == value to invert */
   if ((res = mp_copy (b, &x)) != MP_OKAY) {
-    goto __ERR;
+    goto LBL_ERR;
   }

   /* we need y = |a| */
-  if ((res = mp_abs (a, &y)) != MP_OKAY) {
-    goto __ERR;
+  if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
+    goto LBL_ERR;
   }

   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
-    goto __ERR;
+    goto LBL_ERR;
   }
   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
-    goto __ERR;
+    goto LBL_ERR;
   }
   mp_set (&D, 1);

@@ -61,17 +60,17 @@ top:
   while (mp_iseven (&u) == 1) {
     /* 4.1 u = u/2 */
     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
     /* 4.2 if B is odd then */
     if (mp_isodd (&B) == 1) {
       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
-        goto __ERR;
+        goto LBL_ERR;
       }
     }
     /* B = B/2 */
     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   }

@@ -79,18 +78,18 @@ top:
   while (mp_iseven (&v) == 1) {
     /* 5.1 v = v/2 */
     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
     /* 5.2 if D is odd then */
     if (mp_isodd (&D) == 1) {
       /* D = (D-x)/2 */
       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
-        goto __ERR;
+        goto LBL_ERR;
       }
     }
     /* D = D/2 */
     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   }

@@ -98,20 +97,20 @@ top:
   if (mp_cmp (&u, &v) != MP_LT) {
     /* u = u - v, B = B - D */
     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }

     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   } else {
     /* v - v - u, D = D - B */
     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }

     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   }

@@ -125,21 +124,21 @@ top:
   /* if v != 1 then there is no inverse */
   if (mp_cmp_d (&v, 1) != MP_EQ) {
     res = MP_VAL;
-    goto __ERR;
+    goto LBL_ERR;
   }

   /* b is now the inverse */
   neg = a->sign;
   while (D.sign == MP_NEG) {
     if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   }
   mp_exch (&D, c);
   c->sign = neg;
   res = MP_OKAY;

-__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
+LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
   return res;
 }
 #endif
============================================================
--- bn_fast_mp_montgomery_reduce.c	325a7d4683d34160114e1c5c4bdee24f78d53e34
+++ bn_fast_mp_montgomery_reduce.c	78d8d5998865b41471663c55715fb1dd6752cee7
@@ -23,8 +23,7 @@
  *
  * Based on Algorithm 14.32 on pp.601 of HAC.
 */
-int
-fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
+int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
 {
   int     ix, res, olduse;
   mp_word W[MP_WARRAY];
============================================================
--- bn_fast_s_mp_mul_digs.c	0672a145d0569d227d950c2555234afe2ab394d6
+++ bn_fast_s_mp_mul_digs.c	9a92ee7590332a1db78b9d37171680e6b87e1670
@@ -31,8 +31,7 @@
  * Based on Algorithm 14.12 on pp.595 of HAC.
  *
  */
-int
-fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
   int     olduse, res, pa, ix, iz;
   mp_digit W[MP_WARRAY];
@@ -50,7 +49,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int *

   /* clear the carry */
   _W = 0;
-  for (ix = 0; ix <= pa; ix++) {
+  for (ix = 0; ix < pa; ix++) {
       int      tx, ty;
       int      iy;
       mp_digit *tmpx, *tmpy;
@@ -63,7 +62,7 @@ fast_s_mp_mul_digs (mp_int * a, mp_int *
       tmpx = a->dp + tx;
       tmpy = b->dp + ty;

-      /* this is the number of times the loop will iterrate, essentially its
+      /* this is the number of times the loop will iterrate, essentially
          while (tx++ < a->used && ty-- >= 0) { ... }
        */
       iy = MIN(a->used-tx, ty+1);
@@ -80,14 +79,17 @@ fast_s_mp_mul_digs (mp_int * a, mp_int *
       _W = _W >> ((mp_word)DIGIT_BIT);
   }

+  /* store final carry */
+  W[ix] = (mp_digit)(_W & MP_MASK);
+
   /* setup dest */
   olduse  = c->used;
-  c->used = digs;
+  c->used = pa;

   {
     register mp_digit *tmpc;
     tmpc = c->dp;
-    for (ix = 0; ix < digs; ix++) {
+    for (ix = 0; ix < pa+1; ix++) {
       /* now extract the previous digit [below the carry] */
       *tmpc++ = W[ix];
     }
============================================================
--- bn_fast_s_mp_mul_high_digs.c	0d7785aa91e231bdc765d67ff4074ecc29d556a2
+++ bn_fast_s_mp_mul_high_digs.c	ceff3a71492dd8079556c72a83364f4930e9e733
@@ -24,8 +24,7 @@
  *
  * Based on Algorithm 14.12 on pp.595 of HAC.
  */
-int
-fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
   int     olduse, res, pa, ix, iz;
   mp_digit W[MP_WARRAY];
@@ -42,7 +41,7 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_
   /* number of output digits to produce */
   pa = a->used + b->used;
   _W = 0;
-  for (ix = digs; ix <= pa; ix++) {
+  for (ix = digs; ix < pa; ix++) {
       int      tx, ty, iy;
       mp_digit *tmpx, *tmpy;

@@ -70,6 +69,9 @@ fast_s_mp_mul_high_digs (mp_int * a, mp_
       /* make next carry */
       _W = _W >> ((mp_word)DIGIT_BIT);
   }
+
+  /* store final carry */
+  W[ix] = (mp_digit)(_W & MP_MASK);

   /* setup dest */
   olduse  = c->used;
============================================================
--- bn_fast_s_mp_sqr.c	5765234e01ae11780dcaade97742404013b1da42
+++ bn_fast_s_mp_sqr.c	eda475fe947ffc90906c7167eec15450c2209362
@@ -15,33 +15,14 @@
  * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
  */

-/* fast squaring
- *
- * This is the comba method where the columns of the product
- * are computed first then the carries are computed.  This
- * has the effect of making a very simple inner loop that
- * is executed the most
- *
- * W2 represents the outer products and W the inner.
- *
- * A further optimizations is made because the inner
- * products are of the form "A * B * 2".  The *2 part does
- * not need to be computed until the end which is good
- * because 64-bit shifts are slow!
- *
- * Based on Algorithm 14.16 on pp.597 of HAC.
- *
- */
 /* the jist of squaring...
+ * you do like mult except the offset of the tmpx [one that
+ * starts closer to zero] can't equal the offset of tmpy.
+ * So basically you set up iy like before then you min it with
+ * (ty-tx) so that it never happens.  You double all those
+ * you add in the inner loop

-you do like mult except the offset of the tmpx [one that starts closer to zero]
-can't equal the offset of tmpy.  So basically you set up iy like before then you min it with
-(ty-tx) so that it never happens.  You double all those you add in the inner loop
-
 After that loop you do the squares and add them in.
-
-Remove W2 and don't memset W
-
 */

 int fast_s_mp_sqr (mp_int * a, mp_int * b)
@@ -60,7 +41,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int *

   /* number of output digits to produce */
   W1 = 0;
-  for (ix = 0; ix <= pa; ix++) {
+  for (ix = 0; ix < pa; ix++) {
       int      tx, ty, iy;
       mp_word  _W;
       mp_digit *tmpy;
@@ -76,7 +57,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int *
       tmpx = a->dp + tx;
       tmpy = a->dp + ty;

-      /* this is the number of times the loop will iterrate, essentially its
+      /* this is the number of times the loop will iterrate, essentially
          while (tx++ < a->used && ty-- >= 0) { ... }
        */
       iy = MIN(a->used-tx, ty+1);
@@ -101,7 +82,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int *
       }

       /* store it */
-      W[ix] = _W;
+      W[ix] = (mp_digit)(_W & MP_MASK);

       /* make next carry */
       W1 = _W >> ((mp_word)DIGIT_BIT);
============================================================
--- bn_mp_div.c	d993f550cc6ff9caa00c918bbfba773949fc65ca
+++ bn_mp_div.c	0f214a2ee7c1cfd6152ebd473d2f04db67f2f86c
@@ -49,23 +49,23 @@ int mp_div(mp_int * a, mp_int * b, mp_in

   mp_set(&tq, 1);
   n = mp_count_bits(a) - mp_count_bits(b);
-  if (((res = mp_copy(a, &ta)) != MP_OKAY) ||
-      ((res = mp_copy(b, &tb)) != MP_OKAY) ||
+  if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
+      ((res = mp_abs(b, &tb)) != MP_OKAY) ||
       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
-      goto __ERR;
+      goto LBL_ERR;
   }

   while (n-- >= 0) {
      if (mp_cmp(&tb, &ta) != MP_GT) {
         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
-           goto __ERR;
+           goto LBL_ERR;
         }
      }
      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
-           goto __ERR;
+           goto LBL_ERR;
      }
   }

@@ -74,13 +74,13 @@ int mp_div(mp_int * a, mp_int * b, mp_in
   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
   if (c != NULL) {
      mp_exch(c, &q);
-     c->sign  = n2;
+     c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
   }
   if (d != NULL) {
      mp_exch(d, &ta);
-     d->sign = n;
+     d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
   }
-__ERR:
+LBL_ERR:
    mp_clear_multi(&ta, &tb, &tq, &q, NULL);
    return res;
 }
@@ -129,19 +129,19 @@ int mp_div (mp_int * a, mp_int * b, mp_i
   q.used = a->used + 2;

   if ((res = mp_init (&t1)) != MP_OKAY) {
-    goto __Q;
+    goto LBL_Q;
   }

   if ((res = mp_init (&t2)) != MP_OKAY) {
-    goto __T1;
+    goto LBL_T1;
   }

   if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
-    goto __T2;
+    goto LBL_T2;
   }

   if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
-    goto __X;
+    goto LBL_X;
   }

   /* fix the sign */
@@ -153,10 +153,10 @@ int mp_div (mp_int * a, mp_int * b, mp_i
   if (norm < (int)(DIGIT_BIT-1)) {
      norm = (DIGIT_BIT-1) - norm;
      if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
-       goto __Y;
+       goto LBL_Y;
      }
      if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
-       goto __Y;
+       goto LBL_Y;
      }
   } else {
      norm = 0;
@@ -168,13 +168,13 @@ int mp_div (mp_int * a, mp_int * b, mp_i

   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
   if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
-    goto __Y;
+    goto LBL_Y;
   }

   while (mp_cmp (&x, &y) != MP_LT) {
     ++(q.dp[n - t]);
     if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
-      goto __Y;
+      goto LBL_Y;
     }
   }

@@ -216,7 +216,7 @@ int mp_div (mp_int * a, mp_int * b, mp_i
       t1.dp[1] = y.dp[t];
       t1.used = 2;
       if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
-        goto __Y;
+        goto LBL_Y;
       }

       /* find right hand */
@@ -228,27 +228,27 @@ int mp_div (mp_int * a, mp_int * b, mp_i

     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
     if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
-      goto __Y;
+      goto LBL_Y;
     }

     if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
-      goto __Y;
+      goto LBL_Y;
     }

     if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
-      goto __Y;
+      goto LBL_Y;
     }

     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
     if (x.sign == MP_NEG) {
       if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
-        goto __Y;
+        goto LBL_Y;
       }
       if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
-        goto __Y;
+        goto LBL_Y;
       }
       if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
-        goto __Y;
+        goto LBL_Y;
       }

       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
@@ -275,11 +275,11 @@ int mp_div (mp_int * a, mp_int * b, mp_i

   res = MP_OKAY;

-__Y:mp_clear (&y);
-__X:mp_clear (&x);
-__T2:mp_clear (&t2);
-__T1:mp_clear (&t1);
-__Q:mp_clear (&q);
+LBL_Y:mp_clear (&y);
+LBL_X:mp_clear (&x);
+LBL_T2:mp_clear (&t2);
+LBL_T1:mp_clear (&t1);
+LBL_Q:mp_clear (&q);
   return res;
 }

============================================================
--- bn_mp_dr_reduce.c	1736b0957a7bbeabb32ba902730f8dd031ab8a97
+++ bn_mp_dr_reduce.c	e66ad3324fbe632c9168616227cab4bafada767a
@@ -20,7 +20,7 @@
  * Based on algorithm from the paper
  *
  * "Generating Efficient Primes for Discrete Log Cryptosystems"
- *                 Chae Hoon Lim, Pil Loong Lee,
+ *                 Chae Hoon Lim, Pil Joong Lee,
  *          POSTECH Information Research Laboratories
  *
  * The modulus must be of a special format [see manual]
============================================================
--- bn_mp_exptmod.c	9a6256e60d747e910cc0f089a7cc51f3fae51f39
+++ bn_mp_exptmod.c	5834ec7efb5ce20b64f84e8015eaa59bfd93f090
@@ -61,25 +61,33 @@ int mp_exptmod (mp_int * G, mp_int * X,
      return err;
 #else
      /* no invmod */
-     return MP_VAL
+     return MP_VAL;
 #endif
   }

+/* modified diminished radix reduction */
+#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C)
+  if (mp_reduce_is_2k_l(P) == MP_YES) {
+     return s_mp_exptmod(G, X, P, Y, 1);
+  }
+#endif
+
 #ifdef BN_MP_DR_IS_MODULUS_C
   /* is it a DR modulus? */
   dr = mp_dr_is_modulus(P);
 #else
+  /* default to no */
   dr = 0;
 #endif

 #ifdef BN_MP_REDUCE_IS_2K_C
-  /* if not, is it a uDR modulus? */
+  /* if not, is it a unrestricted DR modulus? */
   if (dr == 0) {
      dr = mp_reduce_is_2k(P) << 1;
   }
 #endif

-  /* if the modulus is odd or dr != 0 use the fast method */
+  /* if the modulus is odd or dr != 0 use the montgomery method */
 #ifdef BN_MP_EXPTMOD_FAST_C
   if (mp_isodd (P) == 1 || dr !=  0) {
     return mp_exptmod_fast (G, X, P, Y, dr);
@@ -87,7 +95,7 @@ int mp_exptmod (mp_int * G, mp_int * X,
 #endif
 #ifdef BN_S_MP_EXPTMOD_C
     /* otherwise use the generic Barrett reduction technique */
-    return s_mp_exptmod (G, X, P, Y);
+    return s_mp_exptmod (G, X, P, Y, 0);
 #else
     /* no exptmod for evens */
     return MP_VAL;
============================================================
--- bn_mp_exptmod_fast.c	ec6cb8ea6c2dde8b87493bd481a51c9f8f946ede
+++ bn_mp_exptmod_fast.c	0c597499c614a4fc38dbf06574749138133f69b2
@@ -29,8 +29,7 @@
    #define TAB_SIZE 256
 #endif

-int
-mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
+int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
 {
   mp_int  M[TAB_SIZE], res;
   mp_digit buf, mp;
@@ -88,11 +87,11 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
 #ifdef BN_MP_MONTGOMERY_SETUP_C
      /* now setup montgomery  */
      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
-        goto __M;
+        goto LBL_M;
      }
 #else
      err = MP_VAL;
-     goto __M;
+     goto LBL_M;
 #endif

      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
@@ -108,7 +107,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
         redux = mp_montgomery_reduce;
 #else
         err = MP_VAL;
-        goto __M;
+        goto LBL_M;
 #endif
      }
   } else if (redmode == 1) {
@@ -118,24 +117,24 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
      redux = mp_dr_reduce;
 #else
      err = MP_VAL;
-     goto __M;
+     goto LBL_M;
 #endif
   } else {
 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
      /* setup DR reduction for moduli of the form 2**k - b */
      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
-        goto __M;
+        goto LBL_M;
      }
      redux = mp_reduce_2k;
 #else
      err = MP_VAL;
-     goto __M;
+     goto LBL_M;
 #endif
   }

   /* setup result */
   if ((err = mp_init (&res)) != MP_OKAY) {
-    goto __M;
+    goto LBL_M;
   }

   /* create M table
@@ -149,45 +148,45 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
      /* now we need R mod m */
      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
-       goto __RES;
+       goto LBL_RES;
      }
 #else
      err = MP_VAL;
-     goto __RES;
+     goto LBL_RES;
 #endif

      /* now set M[1] to G * R mod m */
      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
-       goto __RES;
+       goto LBL_RES;
      }
   } else {
      mp_set(&res, 1);
      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
      }
   }

   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
-    goto __RES;
+    goto LBL_RES;
   }

   for (x = 0; x < (winsize - 1); x++) {
     if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
-      goto __RES;
+      goto LBL_RES;
     }
     if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
-      goto __RES;
+      goto LBL_RES;
     }
   }

   /* create upper table */
   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
-      goto __RES;
+      goto LBL_RES;
     }
     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
-      goto __RES;
+      goto LBL_RES;
     }
   }

@@ -227,10 +226,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
     /* if the bit is zero and mode == 1 then we square */
     if (mode == 1 && y == 0) {
       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }
       if ((err = redux (&res, P, mp)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }
       continue;
     }
@@ -244,19 +243,19 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
       /* square first */
       for (x = 0; x < winsize; x++) {
         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-          goto __RES;
+          goto LBL_RES;
         }
         if ((err = redux (&res, P, mp)) != MP_OKAY) {
-          goto __RES;
+          goto LBL_RES;
         }
       }

       /* then multiply */
       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }
       if ((err = redux (&res, P, mp)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }

       /* empty window and reset */
@@ -271,10 +270,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
     /* square then multiply if the bit is set */
     for (x = 0; x < bitcpy; x++) {
       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }
       if ((err = redux (&res, P, mp)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }

       /* get next bit of the window */
@@ -282,10 +281,10 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
       if ((bitbuf & (1 << winsize)) != 0) {
         /* then multiply */
         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
-          goto __RES;
+          goto LBL_RES;
         }
         if ((err = redux (&res, P, mp)) != MP_OKAY) {
-          goto __RES;
+          goto LBL_RES;
         }
       }
     }
@@ -299,15 +298,15 @@ mp_exptmod_fast (mp_int * G, mp_int * X,
       * of R.
       */
      if ((err = redux(&res, P, mp)) != MP_OKAY) {
-       goto __RES;
+       goto LBL_RES;
      }
   }

   /* swap res with Y */
   mp_exch (&res, Y);
   err = MP_OKAY;
-__RES:mp_clear (&res);
-__M:
+LBL_RES:mp_clear (&res);
+LBL_M:
   mp_clear(&M[1]);
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
     mp_clear (&M[x]);
============================================================
--- bn_mp_exteuclid.c	3c13b9da30303b4bb43aa8a362d695f214c67a94
+++ bn_mp_exteuclid.c	84910598afbed46c88cd0802b8c2607c8ea9e1d0
@@ -59,6 +59,13 @@ int mp_exteuclid(mp_int *a, mp_int *b, m
        if ((err = mp_copy(&t3, &v3)) != MP_OKAY)                                  { goto _ERR; }
    }

+   /* make sure U3 >= 0 */
+   if (u3.sign == MP_NEG) {
+      mp_neg(&u1, &u1);
+      mp_neg(&u2, &u2);
+      mp_neg(&u3, &u3);
+   }
+
    /* copy result out */
    if (U1 != NULL) { mp_exch(U1, &u1); }
    if (U2 != NULL) { mp_exch(U2, &u2); }
============================================================
--- bn_mp_gcd.c	75332794a8c6790c417d7cdcc51c92be8d5bdc91
+++ bn_mp_gcd.c	a5ec727015fd307450f7718c481285dd1599d5d0
@@ -43,7 +43,7 @@ int mp_gcd (mp_int * a, mp_int * b, mp_i
   }

   if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
-    goto __U;
+    goto LBL_U;
   }

   /* must be positive for the remainder of the algorithm */
@@ -57,24 +57,24 @@ int mp_gcd (mp_int * a, mp_int * b, mp_i
   if (k > 0) {
      /* divide the power of two out */
      if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
-        goto __V;
+        goto LBL_V;
      }

      if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
-        goto __V;
+        goto LBL_V;
      }
   }

   /* divide any remaining factors of two out */
   if (u_lsb != k) {
      if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
-        goto __V;
+        goto LBL_V;
      }
   }

   if (v_lsb != k) {
      if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
-        goto __V;
+        goto LBL_V;
      }
   }

@@ -87,23 +87,23 @@ int mp_gcd (mp_int * a, mp_int * b, mp_i

      /* subtract smallest from largest */
      if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
-        goto __V;
+        goto LBL_V;
      }

      /* Divide out all factors of two */
      if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
-        goto __V;
+        goto LBL_V;
      }
   }

   /* multiply by 2**k which we divided out at the beginning */
   if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
-     goto __V;
+     goto LBL_V;
   }
   c->sign = MP_ZPOS;
   res = MP_OKAY;
-__V:mp_clear (&u);
-__U:mp_clear (&v);
+LBL_V:mp_clear (&u);
+LBL_U:mp_clear (&v);
   return res;
 }
 #endif
============================================================
--- bn_mp_invmod_slow.c	c0acf30269eaf91f32b682b1b2d4150ee566f1a5
+++ bn_mp_invmod_slow.c	0e68e06289ca853df2e27007a793f251eeb6db76
@@ -33,25 +33,25 @@ int mp_invmod_slow (mp_int * a, mp_int *
   }

   /* x = a, y = b */
-  if ((res = mp_copy (a, &x)) != MP_OKAY) {
-    goto __ERR;
+  if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
+      goto LBL_ERR;
   }
   if ((res = mp_copy (b, &y)) != MP_OKAY) {
-    goto __ERR;
+    goto LBL_ERR;
   }

   /* 2. [modified] if x,y are both even then return an error! */
   if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
     res = MP_VAL;
-    goto __ERR;
+    goto LBL_ERR;
   }

   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
-    goto __ERR;
+    goto LBL_ERR;
   }
   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
-    goto __ERR;
+    goto LBL_ERR;
   }
   mp_set (&A, 1);
   mp_set (&D, 1);
@@ -61,24 +61,24 @@ top:
   while (mp_iseven (&u) == 1) {
     /* 4.1 u = u/2 */
     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
     /* 4.2 if A or B is odd then */
     if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
       /* A = (A+y)/2, B = (B-x)/2 */
       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
-         goto __ERR;
+         goto LBL_ERR;
       }
       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
-         goto __ERR;
+         goto LBL_ERR;
       }
     }
     /* A = A/2, B = B/2 */
     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   }

@@ -86,24 +86,24 @@ top:
   while (mp_iseven (&v) == 1) {
     /* 5.1 v = v/2 */
     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
     /* 5.2 if C or D is odd then */
     if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
       /* C = (C+y)/2, D = (D-x)/2 */
       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
-         goto __ERR;
+         goto LBL_ERR;
       }
       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
-         goto __ERR;
+         goto LBL_ERR;
       }
     }
     /* C = C/2, D = D/2 */
     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   }

@@ -111,28 +111,28 @@ top:
   if (mp_cmp (&u, &v) != MP_LT) {
     /* u = u - v, A = A - C, B = B - D */
     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }

     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }

     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   } else {
     /* v - v - u, C = C - A, D = D - B */
     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }

     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }

     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
-      goto __ERR;
+      goto LBL_ERR;
     }
   }

@@ -145,27 +145,27 @@ top:
   /* if v != 1 then there is no inverse */
   if (mp_cmp_d (&v, 1) != MP_EQ) {
     res = MP_VAL;
-    goto __ERR;
+    goto LBL_ERR;
   }

   /* if its too low */
   while (mp_cmp_d(&C, 0) == MP_LT) {
       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
-         goto __ERR;
+         goto LBL_ERR;
       }
   }

   /* too big */
   while (mp_cmp_mag(&C, b) != MP_LT) {
       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
-         goto __ERR;
+         goto LBL_ERR;
       }
   }

   /* C is now the inverse */
   mp_exch (&C, c);
   res = MP_OKAY;
-__ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
+LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
   return res;
 }
 #endif
============================================================
--- bn_mp_jacobi.c	5f0894cc606a9979ce7008fc29d56c24cf53e9d6
+++ bn_mp_jacobi.c	9c41eb9191d5875db6dd10ef168fedbe621a3b2a
@@ -50,13 +50,13 @@ int mp_jacobi (mp_int * a, mp_int * p, i
   }

   if ((res = mp_init (&p1)) != MP_OKAY) {
-    goto __A1;
+    goto LBL_A1;
   }

   /* divide out larger power of two */
   k = mp_cnt_lsb(&a1);
   if ((res = mp_div_2d(&a1, k, &a1, NULL)) != MP_OKAY) {
-     goto __P1;
+     goto LBL_P1;
   }

   /* step 4.  if e is even set s=1 */
@@ -84,18 +84,18 @@ int mp_jacobi (mp_int * a, mp_int * p, i
   } else {
     /* n1 = n mod a1 */
     if ((res = mp_mod (p, &a1, &p1)) != MP_OKAY) {
-      goto __P1;
+      goto LBL_P1;
     }
     if ((res = mp_jacobi (&p1, &a1, &r)) != MP_OKAY) {
-      goto __P1;
+      goto LBL_P1;
     }
     *c = s * r;
   }

   /* done */
   res = MP_OKAY;
-__P1:mp_clear (&p1);
-__A1:mp_clear (&a1);
+LBL_P1:mp_clear (&p1);
+LBL_A1:mp_clear (&a1);
   return res;
 }
 #endif
============================================================
--- bn_mp_lcm.c	525b0299d60ea98ecbe42622d6f44d6e64ea32a7
+++ bn_mp_lcm.c	ae6e40952e57e849e762a25ce87385e02fb4bdcd
@@ -28,20 +28,20 @@ int mp_lcm (mp_int * a, mp_int * b, mp_i

   /* t1 = get the GCD of the two inputs */
   if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
-    goto __T;
+    goto LBL_T;
   }

   /* divide the smallest by the GCD */
   if (mp_cmp_mag(a, b) == MP_LT) {
      /* store quotient in t2 such that t2 * b is the LCM */
      if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
-        goto __T;
+        goto LBL_T;
      }
      res = mp_mul(b, &t2, c);
   } else {
      /* store quotient in t2 such that t2 * a is the LCM */
      if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
-        goto __T;
+        goto LBL_T;
      }
      res = mp_mul(a, &t2, c);
   }
@@ -49,7 +49,7 @@ int mp_lcm (mp_int * a, mp_int * b, mp_i
   /* fix the sign to positive */
   c->sign = MP_ZPOS;

-__T:
+LBL_T:
   mp_clear_multi (&t1, &t2, NULL);
   return res;
 }
============================================================
--- bn_mp_mod_2d.c	6860d01c3277c6edd0a7ca03dd22248983306b8e
+++ bn_mp_mod_2d.c	e0e5f0ab899aae9477bf97785f85320ad0b78865
@@ -28,7 +28,7 @@ mp_mod_2d (mp_int * a, int b, mp_int * c
   }

   /* if the modulus is larger than the value than return */
-  if (b > (int) (a->used * DIGIT_BIT)) {
+  if (b >= (int) (a->used * DIGIT_BIT)) {
     res = mp_copy (a, c);
     return res;
   }
============================================================
--- bn_mp_montgomery_calc_normalization.c	fa4dcf6f68dfbab19a37ab68781ef3763b2e904d
+++ bn_mp_montgomery_calc_normalization.c	41769e061872600b8d99328e343fe770b7b3aec4
@@ -28,7 +28,6 @@ int mp_montgomery_calc_normalization (mp
   /* how many bits of last digit does b use */
   bits = mp_count_bits (b) % DIGIT_BIT;

-
   if (b->used > 1) {
      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
         return res;
============================================================
--- bn_mp_mul_d.c	fe7fe17b0dc44d3f3c6ab78998691e412280255e
+++ bn_mp_mul_d.c	684e50bde0381306076b4b64d2ecbab99b7af195
@@ -57,8 +57,9 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int
     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
   }

-  /* store final carry [if any] */
+  /* store final carry [if any] and increment ix offset  */
   *tmpc++ = u;
+  ++ix;

   /* now zero digits above the top */
   while (ix++ < olduse) {
============================================================
--- bn_mp_n_root.c	339cf93663939605c6304419a020f33d4184b3dd
+++ bn_mp_n_root.c	ceda8bdd58d2fb45707be024bc9ad46ceb44a337
@@ -40,11 +40,11 @@ int mp_n_root (mp_int * a, mp_digit b, m
   }

   if ((res = mp_init (&t2)) != MP_OKAY) {
-    goto __T1;
+    goto LBL_T1;
   }

   if ((res = mp_init (&t3)) != MP_OKAY) {
-    goto __T2;
+    goto LBL_T2;
   }

   /* if a is negative fudge the sign but keep track */
@@ -57,52 +57,52 @@ int mp_n_root (mp_int * a, mp_digit b, m
   do {
     /* t1 = t2 */
     if ((res = mp_copy (&t2, &t1)) != MP_OKAY) {
-      goto __T3;
+      goto LBL_T3;
     }

     /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */

     /* t3 = t1**(b-1) */
     if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {
-      goto __T3;
+      goto LBL_T3;
     }

     /* numerator */
     /* t2 = t1**b */
     if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {
-      goto __T3;
+      goto LBL_T3;
     }

     /* t2 = t1**b - a */
     if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {
-      goto __T3;
+      goto LBL_T3;
     }

     /* denominator */
     /* t3 = t1**(b-1) * b  */
     if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {
-      goto __T3;
+      goto LBL_T3;
     }

     /* t3 = (t1**b - a)/(b * t1**(b-1)) */
     if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {
-      goto __T3;
+      goto LBL_T3;
     }

     if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) {
-      goto __T3;
+      goto LBL_T3;
     }
   }  while (mp_cmp (&t1, &t2) != MP_EQ);

   /* result can be off by a few so check */
   for (;;) {
     if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) {
-      goto __T3;
+      goto LBL_T3;
     }

     if (mp_cmp (&t2, a) == MP_GT) {
       if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) {
-         goto __T3;
+         goto LBL_T3;
       }
     } else {
       break;
@@ -120,9 +120,9 @@ int mp_n_root (mp_int * a, mp_digit b, m

   res = MP_OKAY;

-__T3:mp_clear (&t3);
-__T2:mp_clear (&t2);
-__T1:mp_clear (&t1);
+LBL_T3:mp_clear (&t3);
+LBL_T2:mp_clear (&t2);
+LBL_T1:mp_clear (&t1);
   return res;
 }
 #endif
============================================================
--- bn_mp_neg.c	afc6d203cc58d956e118f218c440bca91f94adce
+++ bn_mp_neg.c	02cb8735d235feb2f3016b581f23d6da582f8765
@@ -19,12 +19,18 @@ int mp_neg (mp_int * a, mp_int * b)
 int mp_neg (mp_int * a, mp_int * b)
 {
   int     res;
-  if ((res = mp_copy (a, b)) != MP_OKAY) {
-    return res;
+  if (a != b) {
+     if ((res = mp_copy (a, b)) != MP_OKAY) {
+        return res;
+     }
   }
+
   if (mp_iszero(b) != MP_YES) {
      b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
+  } else {
+     b->sign = MP_ZPOS;
   }
+
   return MP_OKAY;
 }
 #endif
============================================================
--- bn_mp_prime_fermat.c	7a08d1ab74369ae14b39a95258a6b955e0d7be22
+++ bn_mp_prime_fermat.c	bed25e3d7aef4e9ce0bd9642626e082b7bc02e73
@@ -43,7 +43,7 @@ int mp_prime_fermat (mp_int * a, mp_int

   /* compute t = b**a mod a */
   if ((err = mp_exptmod (b, a, a, &t)) != MP_OKAY) {
-    goto __T;
+    goto LBL_T;
   }

   /* is it equal to b? */
@@ -52,7 +52,7 @@ int mp_prime_fermat (mp_int * a, mp_int
   }

   err = MP_OKAY;
-__T:mp_clear (&t);
+LBL_T:mp_clear (&t);
   return err;
 }
 #endif
============================================================
--- bn_mp_prime_is_divisible.c	7536731238b627bc2d90c623d38fd2952bf0dba8
+++ bn_mp_prime_is_divisible.c	ac6f9ad19e8ee7c27df8dab72b160090de3f88e5
@@ -29,8 +29,8 @@ int mp_prime_is_divisible (mp_int * a, i
   *result = MP_NO;

   for (ix = 0; ix < PRIME_SIZE; ix++) {
-    /* what is a mod __prime_tab[ix] */
-    if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
+    /* what is a mod LBL_prime_tab[ix] */
+    if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
       return err;
     }

============================================================
--- bn_mp_prime_is_prime.c	2c13d4e47af42472ed769696f340f3272cfae3c6
+++ bn_mp_prime_is_prime.c	616c045068c12c0d7cd8a73080ef55da4e501493
@@ -37,7 +37,7 @@ int mp_prime_is_prime (mp_int * a, int t

   /* is the input equal to one of the primes in the table? */
   for (ix = 0; ix < PRIME_SIZE; ix++) {
-      if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
+      if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
          *result = 1;
          return MP_OKAY;
       }
@@ -60,20 +60,20 @@ int mp_prime_is_prime (mp_int * a, int t

   for (ix = 0; ix < t; ix++) {
     /* set the prime */
-    mp_set (&b, __prime_tab[ix]);
+    mp_set (&b, ltm_prime_tab[ix]);

     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
-      goto __B;
+      goto LBL_B;
     }

     if (res == MP_NO) {
-      goto __B;
+      goto LBL_B;
     }
   }

   /* passed the test */
   *result = MP_YES;
-__B:mp_clear (&b);
+LBL_B:mp_clear (&b);
   return err;
 }
 #endif
============================================================
--- bn_mp_prime_miller_rabin.c	aef8e16b5107c8424ed96e143d41ac1f0c419a09
+++ bn_mp_prime_miller_rabin.c	e893c60f539dce9136d4afe96650b4e5a649876b
@@ -40,12 +40,12 @@ int mp_prime_miller_rabin (mp_int * a, m
     return err;
   }
   if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
-    goto __N1;
+    goto LBL_N1;
   }

   /* set 2**s * r = n1 */
   if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
-    goto __N1;
+    goto LBL_N1;
   }

   /* count the number of least significant bits
@@ -55,15 +55,15 @@ int mp_prime_miller_rabin (mp_int * a, m

   /* now divide n - 1 by 2**s */
   if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
-    goto __R;
+    goto LBL_R;
   }

   /* compute y = b**r mod a */
   if ((err = mp_init (&y)) != MP_OKAY) {
-    goto __R;
+    goto LBL_R;
   }
   if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
-    goto __Y;
+    goto LBL_Y;
   }

   /* if y != 1 and y != n1 do */
@@ -72,12 +72,12 @@ int mp_prime_miller_rabin (mp_int * a, m
     /* while j <= s-1 and y != n1 */
     while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
       if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
-         goto __Y;
+         goto LBL_Y;
       }

       /* if y == 1 then composite */
       if (mp_cmp_d (&y, 1) == MP_EQ) {
-         goto __Y;
+         goto LBL_Y;
       }

       ++j;
@@ -85,15 +85,15 @@ int mp_prime_miller_rabin (mp_int * a, m

     /* if y != n1 then composite */
     if (mp_cmp (&y, &n1) != MP_EQ) {
-      goto __Y;
+      goto LBL_Y;
     }
   }

   /* probably prime now */
   *result = MP_YES;
-__Y:mp_clear (&y);
-__R:mp_clear (&r);
-__N1:mp_clear (&n1);
+LBL_Y:mp_clear (&y);
+LBL_R:mp_clear (&r);
+LBL_N1:mp_clear (&n1);
   return err;
 }
 #endif
============================================================
--- bn_mp_prime_next_prime.c	23111d40777e3c227c814ceb8726f187328b1db9
+++ bn_mp_prime_next_prime.c	cc04aebd4d5b5e203b330e0f4a8180c48be6af13
@@ -35,10 +35,10 @@ int mp_prime_next_prime(mp_int *a, int t
    a->sign = MP_ZPOS;

    /* simple algo if a is less than the largest prime in the table */
-   if (mp_cmp_d(a, __prime_tab[PRIME_SIZE-1]) == MP_LT) {
+   if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {
       /* find which prime it is bigger than */
       for (x = PRIME_SIZE - 2; x >= 0; x--) {
-          if (mp_cmp_d(a, __prime_tab[x]) != MP_LT) {
+          if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {
              if (bbs_style == 1) {
                 /* ok we found a prime smaller or
                  * equal [so the next is larger]
@@ -46,17 +46,17 @@ int mp_prime_next_prime(mp_int *a, int t
                  * however, the prime must be
                  * congruent to 3 mod 4
                  */
-                if ((__prime_tab[x + 1] & 3) != 3) {
+                if ((ltm_prime_tab[x + 1] & 3) != 3) {
                    /* scan upwards for a prime congruent to 3 mod 4 */
                    for (y = x + 1; y < PRIME_SIZE; y++) {
-                       if ((__prime_tab[y] & 3) == 3) {
-                          mp_set(a, __prime_tab[y]);
+                       if ((ltm_prime_tab[y] & 3) == 3) {
+                          mp_set(a, ltm_prime_tab[y]);
                           return MP_OKAY;
                        }
                    }
                 }
              } else {
-                mp_set(a, __prime_tab[x + 1]);
+                mp_set(a, ltm_prime_tab[x + 1]);
                 return MP_OKAY;
              }
           }
@@ -94,7 +94,7 @@ int mp_prime_next_prime(mp_int *a, int t

    /* generate the restable */
    for (x = 1; x < PRIME_SIZE; x++) {
-      if ((err = mp_mod_d(a, __prime_tab[x], res_tab + x)) != MP_OKAY) {
+      if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {
          return err;
       }
    }
@@ -120,8 +120,8 @@ int mp_prime_next_prime(mp_int *a, int t
              res_tab[x] += kstep;

              /* subtract the modulus [instead of using division] */
-             if (res_tab[x] >= __prime_tab[x]) {
-                res_tab[x]  -= __prime_tab[x];
+             if (res_tab[x] >= ltm_prime_tab[x]) {
+                res_tab[x]  -= ltm_prime_tab[x];
              }

              /* set flag if zero */
@@ -133,7 +133,7 @@ int mp_prime_next_prime(mp_int *a, int t

       /* add the step */
       if ((err = mp_add_d(a, step, a)) != MP_OKAY) {
-         goto __ERR;
+         goto LBL_ERR;
       }

       /* if didn't pass sieve and step == MAX then skip test */
@@ -143,9 +143,9 @@ int mp_prime_next_prime(mp_int *a, int t

       /* is this prime? */
       for (x = 0; x < t; x++) {
-          mp_set(&b, __prime_tab[t]);
+          mp_set(&b, ltm_prime_tab[t]);
           if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
-             goto __ERR;
+             goto LBL_ERR;
           }
           if (res == MP_NO) {
              break;
@@ -158,7 +158,7 @@ int mp_prime_next_prime(mp_int *a, int t
    }

    err = MP_OKAY;
-__ERR:
+LBL_ERR:
    mp_clear(&b);
    return err;
 }
============================================================
--- bn_mp_prime_random_ex.c	31f8a27d870e541417b0ca1a911480e1e060169f
+++ bn_mp_prime_random_ex.c	4cba143b51b505f3c881caf21f5879695d0dcefa
@@ -47,7 +47,7 @@ int mp_prime_random_ex(mp_int *a, int t,
    }

    /* calc the byte size */
-   bsize = (size>>3)+(size&7?1:0);
+   bsize = (size>>3) + ((size&7)?1:0);

    /* we need a buffer of bsize bytes */
    tmp = OPT_CAST(unsigned char) XMALLOC(bsize);
@@ -56,19 +56,19 @@ int mp_prime_random_ex(mp_int *a, int t,
    }

    /* calc the maskAND value for the MSbyte*/
-   maskAND = 0xFF >> (8 - (size & 7));
+   maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));

    /* calc the maskOR_msb */
    maskOR_msb        = 0;
-   maskOR_msb_offset = (size - 2) >> 3;
+   maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
    if (flags & LTM_PRIME_2MSB_ON) {
       maskOR_msb     |= 1 << ((size - 2) & 7);
    } else if (flags & LTM_PRIME_2MSB_OFF) {
       maskAND        &= ~(1 << ((size - 2) & 7));
-   }
+   }

    /* get the maskOR_lsb */
-   maskOR_lsb         = 0;
+   maskOR_lsb         = 1;
    if (flags & LTM_PRIME_BBS) {
       maskOR_lsb     |= 3;
    }
============================================================
--- bn_mp_radix_size.c	891ae52347addbb788c35c3c11e4912c6f6d7c35
+++ bn_mp_radix_size.c	cea57d1d767181e6a51d13d3dd79684a4e62482a
@@ -35,22 +35,29 @@ int mp_radix_size (mp_int * a, int radix
     return MP_VAL;
   }

-  /* init a copy of the input */
-  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
-    return res;
+  if (mp_iszero(a) == MP_YES) {
+     *size = 2;
+    return MP_OKAY;
   }

   /* digs is the digit count */
   digs = 0;

   /* if it's negative add one for the sign */
-  if (t.sign == MP_NEG) {
+  if (a->sign == MP_NEG) {
     ++digs;
-    t.sign = MP_ZPOS;
   }

+  /* init a copy of the input */
+  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
+    return res;
+  }
+
+  /* force temp to positive */
+  t.sign = MP_ZPOS;
+
   /* fetch out all of the digits */
-  while (mp_iszero (&t) == 0) {
+  while (mp_iszero (&t) == MP_NO) {
     if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
       mp_clear (&t);
       return res;
============================================================
--- bn_mp_rand.c	139744910e08eeab5d37af0510cea07399d318ac
+++ bn_mp_rand.c	21e113ee13fd97a003bb05376e3bf1c706469d43
@@ -29,14 +29,14 @@ mp_rand (mp_int * a, int digits)

   /* first place a random non-zero digit */
   do {
-    d = ((mp_digit) abs (rand ()));
+    d = ((mp_digit) abs (rand ())) & MP_MASK;
   } while (d == 0);

   if ((res = mp_add_d (a, d, a)) != MP_OKAY) {
     return res;
   }

-  while (digits-- > 0) {
+  while (--digits > 0) {
     if ((res = mp_lshd (a, 1)) != MP_OKAY) {
       return res;
     }
============================================================
--- bn_mp_read_radix.c	a29dabc5cc4dfe65e231782ea78675ca067302b7
+++ bn_mp_read_radix.c	a761d92fa0571ee25a4d8e25029b65ed715f5c41
@@ -16,7 +16,7 @@
  */

 /* read a string [ASCII] in a given radix */
-int mp_read_radix (mp_int * a, char *str, int radix)
+int mp_read_radix (mp_int * a, const char *str, int radix)
 {
   int     y, res, neg;
   char    ch;
============================================================
--- bn_mp_reduce.c	6e809ceeb191e65004ef4d040f5f8b14a2253074
+++ bn_mp_reduce.c	22c1ae63de48d5cc0e48db4bcadef234f025e060
@@ -19,8 +19,7 @@
  * precomputed via mp_reduce_setup.
  * From HAC pp.604 Algorithm 14.42
  */
-int
-mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
+int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
 {
   mp_int  q;
   int     res, um = m->used;
@@ -40,11 +39,11 @@ mp_reduce (mp_int * x, mp_int * m, mp_in
     }
   } else {
 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
-    if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
+    if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
       goto CLEANUP;
     }
 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
-    if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
+    if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
       goto CLEANUP;
     }
 #else
============================================================
--- bn_mp_reduce_2k.c	9aa2bd43d0e23a59c71b2ee1461823cad97cbc1b
+++ bn_mp_reduce_2k.c	3a75893da1c3a1c868056cb31e118a3683200bd5
@@ -16,8 +16,7 @@
  */

 /* reduces a modulo n where n is of the form 2**p - d */
-int
-mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
+int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
 {
    mp_int q;
    int    p, res;
============================================================
--- bn_mp_reduce_2k_setup.c	6989724dbee168ff723d4732c1f5bf29fb05dd33
+++ bn_mp_reduce_2k_setup.c	94f2c64d767763bc4c38322b408d768ca2aa4ecc
@@ -16,8 +16,7 @@
  */

 /* determines the setup value */
-int
-mp_reduce_2k_setup(mp_int *a, mp_digit *d)
+int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
 {
    int res, p;
    mp_int tmp;
============================================================
--- bn_mp_reduce_is_2k.c	daa704193df12dfc34059c8a7eb5223470b9464f
+++ bn_mp_reduce_is_2k.c	4d4b366051978336c6ab81f3e987b0aa19c16812
@@ -22,9 +22,9 @@ int mp_reduce_is_2k(mp_int *a)
    mp_digit iz;

    if (a->used == 0) {
-      return 0;
+      return MP_NO;
    } else if (a->used == 1) {
-      return 1;
+      return MP_YES;
    } else if (a->used > 1) {
       iy = mp_count_bits(a);
       iz = 1;
@@ -33,7 +33,7 @@ int mp_reduce_is_2k(mp_int *a)
       /* Test every bit from the second digit up, must be 1 */
       for (ix = DIGIT_BIT; ix < iy; ix++) {
           if ((a->dp[iw] & iz) == 0) {
-             return 0;
+             return MP_NO;
           }
           iz <<= 1;
           if (iz > (mp_digit)MP_MASK) {
@@ -42,7 +42,7 @@ int mp_reduce_is_2k(mp_int *a)
           }
       }
    }
-   return 1;
+   return MP_YES;
 }

 #endif
============================================================
--- bn_mp_to_signed_bin.c	7c004817480da3880393125065bb1c86994e49aa
+++ bn_mp_to_signed_bin.c	37639284a790d77339bcd7f53e66a18017563dd6
@@ -16,8 +16,7 @@
  */

 /* store in signed [big endian] format */
-int
-mp_to_signed_bin (mp_int * a, unsigned char *b)
+int mp_to_signed_bin (mp_int * a, unsigned char *b)
 {
   int     res;

============================================================
--- bn_mp_to_unsigned_bin.c	b63653084377cf62c63b7ad3f5fbe8b3bc1daa7e
+++ bn_mp_to_unsigned_bin.c	efb1eee1129615146646f646378a25742c9a6492
@@ -16,8 +16,7 @@
  */

 /* store in unsigned [big endian] format */
-int
-mp_to_unsigned_bin (mp_int * a, unsigned char *b)
+int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
 {
   int     x, res;
   mp_int  t;
============================================================
--- bn_mp_toom_mul.c	ef10988525e274d5e954a11fc04edb96bc4590a3
+++ bn_mp_toom_mul.c	8a45c9922034269d563cbdbd1b55d772ccc06056
@@ -17,9 +17,10 @@

 /* multiplication using the Toom-Cook 3-way algorithm
  *
- * Much more complicated than Karatsuba but has a lower asymptotic running time of
- * O(N**1.464).  This algorithm is only particularly useful on VERY large
- * inputs (we're talking 1000s of digits here...).
+ * Much more complicated than Karatsuba but has a lower
+ * asymptotic running time of O(N**1.464).  This algorithm is
+ * only particularly useful on VERY large inputs
+ * (we're talking 1000s of digits here...).
 */
 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
 {
============================================================
--- bn_mp_unsigned_bin_size.c	30a163d04f2e09c8df11ab2f8e9f781adb500ec4
+++ bn_mp_unsigned_bin_size.c	8b1963305618f9f8effe51212c9794d59e8d6c24
@@ -16,8 +16,7 @@
  */

 /* get the size for an unsigned equivalent */
-int
-mp_unsigned_bin_size (mp_int * a)
+int mp_unsigned_bin_size (mp_int * a)
 {
   int     size = mp_count_bits (a);
   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
============================================================
--- bn_mp_xor.c	fe21cdda0a62908ce461de7a806cfdd743c4db1b
+++ bn_mp_xor.c	6a3de9432fe1ad939de000694be657772cd913e7
@@ -37,7 +37,7 @@ mp_xor (mp_int * a, mp_int * b, mp_int *
   }

   for (ix = 0; ix < px; ix++) {
-
+     t.dp[ix] ^= x->dp[ix];
   }
   mp_clamp (&t);
   mp_exch (c, &t);
============================================================
--- bn_mp_zero.c	130f819e9e81d1206739e40b52df5b86a82d584c
+++ bn_mp_zero.c	4247063811e504255b015c2cc19acb28d2a304e4
@@ -16,11 +16,17 @@
  */

 /* set to zero */
-void
-mp_zero (mp_int * a)
+void mp_zero (mp_int * a)
 {
+  int       n;
+  mp_digit *tmp;
+
   a->sign = MP_ZPOS;
   a->used = 0;
-  memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
+
+  tmp = a->dp;
+  for (n = 0; n < a->alloc; n++) {
+     *tmp++ = 0;
+  }
 }
 #endif
============================================================
--- bn_prime_tab.c	55558f0b3a503f1604933fb2d05e900af8ea9c40
+++ bn_prime_tab.c	fec54b5cd76b20913620ffe6922c6a37ee96e1b7
@@ -14,7 +14,7 @@
  *
  * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org
  */
-const mp_digit __prime_tab[] = {
+const mp_digit ltm_prime_tab[] = {
   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
============================================================
--- bn_s_mp_exptmod.c	9af99058a9d79e6032e4c8302c568dc89b9a8d4d
+++ bn_s_mp_exptmod.c	7ac1accce85a140678573008fcdd67f2834737ed
@@ -21,11 +21,12 @@
    #define TAB_SIZE 256
 #endif

-int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
 {
   mp_int  M[TAB_SIZE], res, mu;
   mp_digit buf;
   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
+  int (*redux)(mp_int*,mp_int*,mp_int*);

   /* find window size */
   x = mp_count_bits (X);
@@ -70,11 +71,20 @@ int s_mp_exptmod (mp_int * G, mp_int * X

   /* create mu, used for Barrett reduction */
   if ((err = mp_init (&mu)) != MP_OKAY) {
-    goto __M;
+    goto LBL_M;
   }
-  if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
-    goto __MU;
-  }
+
+  if (redmode == 0) {
+     if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
+        goto LBL_MU;
+     }
+     redux = mp_reduce;
+  } else {
+     if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
+        goto LBL_MU;
+     }
+     redux = mp_reduce_2k_l;
+  }

   /* create M table
    *
@@ -85,23 +95,26 @@ int s_mp_exptmod (mp_int * G, mp_int * X
    * computed though accept for M[0] and M[1]
    */
   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
-    goto __MU;
+    goto LBL_MU;
   }

   /* compute the value at M[1<<(winsize-1)] by squaring
    * M[1] (winsize-1) times
    */
   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
-    goto __MU;
+    goto LBL_MU;
   }

   for (x = 0; x < (winsize - 1); x++) {
+    /* square it */
     if ((err = mp_sqr (&M[1 << (winsize - 1)],
                        &M[1 << (winsize - 1)])) != MP_OKAY) {
-      goto __MU;
+      goto LBL_MU;
     }
-    if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
-      goto __MU;
+
+    /* reduce modulo P */
+    if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
+      goto LBL_MU;
     }
   }

@@ -110,16 +123,16 @@ int s_mp_exptmod (mp_int * G, mp_int * X
    */
   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
-      goto __MU;
+      goto LBL_MU;
     }
-    if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
-      goto __MU;
+    if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
+      goto LBL_MU;
     }
   }

   /* setup result */
   if ((err = mp_init (&res)) != MP_OKAY) {
-    goto __MU;
+    goto LBL_MU;
   }
   mp_set (&res, 1);

@@ -159,10 +172,10 @@ int s_mp_exptmod (mp_int * G, mp_int * X
     /* if the bit is zero and mode == 1 then we square */
     if (mode == 1 && y == 0) {
       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }
-      if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
-        goto __RES;
+      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+        goto LBL_RES;
       }
       continue;
     }
@@ -176,19 +189,19 @@ int s_mp_exptmod (mp_int * G, mp_int * X
       /* square first */
       for (x = 0; x < winsize; x++) {
         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-          goto __RES;
+          goto LBL_RES;
         }
-        if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
-          goto __RES;
+        if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+          goto LBL_RES;
         }
       }

       /* then multiply */
       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }
-      if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
-        goto __RES;
+      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+        goto LBL_RES;
       }

       /* empty window and reset */
@@ -203,20 +216,20 @@ int s_mp_exptmod (mp_int * G, mp_int * X
     /* square then multiply if the bit is set */
     for (x = 0; x < bitcpy; x++) {
       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-        goto __RES;
+        goto LBL_RES;
       }
-      if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
-        goto __RES;
+      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+        goto LBL_RES;
       }

       bitbuf <<= 1;
       if ((bitbuf & (1 << winsize)) != 0) {
         /* then multiply */
         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
-          goto __RES;
+          goto LBL_RES;
         }
-        if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
-          goto __RES;
+        if ((err = redux (&res, P, &mu)) != MP_OKAY) {
+          goto LBL_RES;
         }
       }
     }
@@ -224,9 +237,9 @@ int s_mp_exptmod (mp_int * G, mp_int * X

   mp_exch (&res, Y);
   err = MP_OKAY;
-__RES:mp_clear (&res);
-__MU:mp_clear (&mu);
-__M:
+LBL_RES:mp_clear (&res);
+LBL_MU:mp_clear (&mu);
+LBL_M:
   mp_clear(&M[1]);
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
     mp_clear (&M[x]);
============================================================
--- bn_s_mp_mul_digs.c	132dd48c3ff5effbb5ea45cdcd744ee77d7253da
+++ bn_s_mp_mul_digs.c	d312dd371b770bf210c2d96f267247163ab15e0f
@@ -19,8 +19,7 @@
  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
  * many digits of output are created.
  */
-int
-s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
+int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
   mp_int  t;
   int     res, pa, pb, ix, iy;
============================================================
--- bn_s_mp_sqr.c	73569fd80f5b6535e45cd018a71661f52e4eb9b1
+++ bn_s_mp_sqr.c	eba4101c1f1ff9dc11fadf942075e3e0fff40737
@@ -16,8 +16,7 @@
  */

 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
-int
-s_mp_sqr (mp_int * a, mp_int * b)
+int s_mp_sqr (mp_int * a, mp_int * b)
 {
   mp_int  t;
   int     res, ix, iy, pa;
============================================================
--- bncore.c	9168a11431458dface0fe16fb1cb4e5c483ff314
+++ bncore.c	2a706b0ca7b0d41a6ef37bc33c26d2d2a4338b7e
@@ -20,11 +20,12 @@
  CPU                    /Compiler     /MUL CUTOFF/SQR CUTOFF
 -------------------------------------------------------------
  Intel P4 Northwood     /GCC v3.4.1   /        88/       128/LTM 0.32 ;-)
+ AMD Athlon64           /GCC v3.4.4   /        74/       124/LTM 0.34

 */

-int     KARATSUBA_MUL_CUTOFF = 88,      /* Min. number of digits before Karatsuba multiplication is used. */
-        KARATSUBA_SQR_CUTOFF = 128,     /* Min. number of digits before Karatsuba squaring is used. */
+int     KARATSUBA_MUL_CUTOFF = 74,      /* Min. number of digits before Karatsuba multiplication is used. */
+        KARATSUBA_SQR_CUTOFF = 124,     /* Min. number of digits before Karatsuba squaring is used. */

         TOOM_MUL_CUTOFF      = 350,      /* no optimal values of these are known yet so set em high */
         TOOM_SQR_CUTOFF      = 400;
============================================================
--- callgraph.txt	1c575d5b40fd8adc98fcbae3be9b0b992d9fd7ac
+++ callgraph.txt	bc126b493294098a3277e79cc94bcb17871a085c
@@ -245,6 +245,7 @@
 |   |   +--->BN_MP_INIT_MULTI_C
 |   |   |   +--->BN_MP_CLEAR_C
 |   |   +--->BN_MP_COUNT_BITS_C
+|   |   +--->BN_MP_ABS_C
 |   |   +--->BN_MP_MUL_2D_C
 |   |   |   +--->BN_MP_GROW_C
 |   |   |   +--->BN_MP_LSHD_C
@@ -298,6 +299,7 @@
 |   |   +--->BN_MP_CLEAR_C
 |   +--->BN_MP_SET_C
 |   +--->BN_MP_COUNT_BITS_C
+|   +--->BN_MP_ABS_C
 |   +--->BN_MP_MUL_2D_C
 |   |   +--->BN_MP_GROW_C
 |   |   +--->BN_MP_LSHD_C
@@ -404,6 +406,7 @@
 |   |   |   +--->BN_MP_CLEAR_C
 |   |   +--->BN_MP_SET_C
 |   |   +--->BN_MP_COUNT_BITS_C
+|   |   +--->BN_MP_ABS_C
 |   |   +--->BN_MP_MUL_2D_C
 |   |   |   +--->BN_MP_GROW_C
 |   |   |   +--->BN_MP_LSHD_C
@@ -700,6 +703,7 @@
 |   |   |   +--->BN_MP_INIT_MULTI_C
 |   |   |   |   +--->BN_MP_CLEAR_C
 |   |   |   +--->BN_MP_COUNT_BITS_C
+|   |   |   +--->BN_MP_ABS_C
 |   |   |   +--->BN_MP_MUL_2D_C
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_LSHD_C
@@ -753,6 +757,7 @@
 |   |   |   +--->BN_MP_CLEAR_C
 |   |   +--->BN_MP_SET_C
 |   |   +--->BN_MP_COUNT_BITS_C
+|   |   +--->BN_MP_ABS_C
 |   |   +--->BN_MP_MUL_2D_C
 |   |   |   +--->BN_MP_GROW_C
 |   |   |   +--->BN_MP_LSHD_C
@@ -902,7 +907,64 @@
 |   |   |   +--->BN_MP_CLEAR_C
 |   |   +--->BN_MP_COPY_C
 |   |   |   +--->BN_MP_GROW_C
-|   |   +--->BN_MP_ABS_C
+|   |   +--->BN_MP_MOD_C
+|   |   |   +--->BN_MP_DIV_C
+|   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   +--->BN_MP_SET_C
+|   |   |   |   +--->BN_MP_COUNT_BITS_C
+|   |   |   |   +--->BN_MP_ABS_C
+|   |   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CMP_C
+|   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_DIV_2D_C
+|   |   |   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   +--->BN_MP_CLEAR_MULTI_C
+|   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   +--->BN_MP_INIT_COPY_C
+|   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   +--->BN_MP_MUL_D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   +--->BN_MP_CLEAR_C
+|   |   |   +--->BN_MP_ADD_C
+|   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_EXCH_C
 |   |   +--->BN_MP_SET_C
 |   |   |   +--->BN_MP_ZERO_C
 |   |   +--->BN_MP_DIV_2_C
@@ -933,6 +995,66 @@
 |   +--->BN_MP_INVMOD_SLOW_C
 |   |   +--->BN_MP_INIT_MULTI_C
 |   |   |   +--->BN_MP_CLEAR_C
+|   |   +--->BN_MP_MOD_C
+|   |   |   +--->BN_MP_DIV_C
+|   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   +--->BN_MP_COPY_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   +--->BN_MP_SET_C
+|   |   |   |   +--->BN_MP_COUNT_BITS_C
+|   |   |   |   +--->BN_MP_ABS_C
+|   |   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CMP_C
+|   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_DIV_2D_C
+|   |   |   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   +--->BN_MP_CLEAR_MULTI_C
+|   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   +--->BN_MP_INIT_COPY_C
+|   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   +--->BN_MP_MUL_D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   +--->BN_MP_CLEAR_C
+|   |   |   +--->BN_MP_ADD_C
+|   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_EXCH_C
 |   |   +--->BN_MP_COPY_C
 |   |   |   +--->BN_MP_GROW_C
 |   |   +--->BN_MP_SET_C
@@ -968,93 +1090,63 @@
 |   +--->BN_MP_COPY_C
 |   |   +--->BN_MP_GROW_C
 +--->BN_MP_CLEAR_MULTI_C
-+--->BN_MP_DR_IS_MODULUS_C
-+--->BN_MP_REDUCE_IS_2K_C
-|   +--->BN_MP_REDUCE_2K_C
-|   |   +--->BN_MP_COUNT_BITS_C
-|   |   +--->BN_MP_DIV_2D_C
-|   |   |   +--->BN_MP_COPY_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_ZERO_C
-|   |   |   +--->BN_MP_MOD_2D_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_RSHD_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_EXCH_C
-|   |   +--->BN_MP_MUL_D_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_S_MP_ADD_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_MP_CMP_MAG_C
-|   |   +--->BN_S_MP_SUB_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
++--->BN_MP_REDUCE_IS_2K_L_C
++--->BN_S_MP_EXPTMOD_C
 |   +--->BN_MP_COUNT_BITS_C
-+--->BN_MP_EXPTMOD_FAST_C
-|   +--->BN_MP_COUNT_BITS_C
-|   +--->BN_MP_MONTGOMERY_SETUP_C
-|   +--->BN_FAST_MP_MONTGOMERY_REDUCE_C
-|   |   +--->BN_MP_GROW_C
-|   |   +--->BN_MP_RSHD_C
-|   |   |   +--->BN_MP_ZERO_C
-|   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_MP_CMP_MAG_C
-|   |   +--->BN_S_MP_SUB_C
-|   +--->BN_MP_MONTGOMERY_REDUCE_C
-|   |   +--->BN_MP_GROW_C
-|   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_MP_RSHD_C
-|   |   |   +--->BN_MP_ZERO_C
-|   |   +--->BN_MP_CMP_MAG_C
-|   |   +--->BN_S_MP_SUB_C
-|   +--->BN_MP_DR_SETUP_C
-|   +--->BN_MP_DR_REDUCE_C
-|   |   +--->BN_MP_GROW_C
-|   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_MP_CMP_MAG_C
-|   |   +--->BN_S_MP_SUB_C
-|   +--->BN_MP_REDUCE_2K_SETUP_C
+|   +--->BN_MP_REDUCE_SETUP_C
 |   |   +--->BN_MP_2EXPT_C
 |   |   |   +--->BN_MP_ZERO_C
 |   |   |   +--->BN_MP_GROW_C
-|   |   +--->BN_S_MP_SUB_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   +--->BN_MP_REDUCE_2K_C
-|   |   +--->BN_MP_DIV_2D_C
+|   |   +--->BN_MP_DIV_C
+|   |   |   +--->BN_MP_CMP_MAG_C
 |   |   |   +--->BN_MP_COPY_C
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   +--->BN_MP_ZERO_C
-|   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   +--->BN_MP_INIT_MULTI_C
+|   |   |   +--->BN_MP_SET_C
+|   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
 |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_RSHD_C
-|   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_CMP_C
+|   |   |   +--->BN_MP_SUB_C
+|   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_ADD_C
+|   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_DIV_2D_C
+|   |   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_EXCH_C
 |   |   |   +--->BN_MP_EXCH_C
-|   |   +--->BN_MP_MUL_D_C
-|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   +--->BN_MP_INIT_COPY_C
+|   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   +--->BN_MP_RSHD_C
+|   |   |   +--->BN_MP_MUL_D_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_S_MP_ADD_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_MP_CMP_MAG_C
-|   |   +--->BN_S_MP_SUB_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   +--->BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
-|   |   +--->BN_MP_2EXPT_C
+|   +--->BN_MP_REDUCE_C
+|   |   +--->BN_MP_INIT_COPY_C
+|   |   |   +--->BN_MP_COPY_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   +--->BN_MP_RSHD_C
 |   |   |   +--->BN_MP_ZERO_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   +--->BN_MP_SET_C
-|   |   |   +--->BN_MP_ZERO_C
-|   |   +--->BN_MP_MUL_2_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   +--->BN_MP_CMP_MAG_C
-|   |   +--->BN_S_MP_SUB_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   +--->BN_MP_MULMOD_C
 |   |   +--->BN_MP_MUL_C
 |   |   |   +--->BN_MP_TOOM_MUL_C
 |   |   |   |   +--->BN_MP_INIT_MULTI_C
@@ -1065,8 +1157,6 @@
 |   |   |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   |   +--->BN_MP_COPY_C
 |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_RSHD_C
-|   |   |   |   |   +--->BN_MP_ZERO_C
 |   |   |   |   +--->BN_MP_MUL_2_C
 |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_ADD_C
@@ -1118,8 +1208,6 @@
 |   |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_LSHD_C
 |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_RSHD_C
-|   |   |   |   |   |   +--->BN_MP_ZERO_C
 |   |   |   +--->BN_FAST_S_MP_MUL_DIGS_C
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_CLAMP_C
@@ -1127,62 +1215,150 @@
 |   |   |   |   +--->BN_MP_INIT_SIZE_C
 |   |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   |   +--->BN_MP_EXCH_C
-|   |   +--->BN_MP_MOD_C
-|   |   |   +--->BN_MP_DIV_C
-|   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_MUL_HIGH_DIGS_C
+|   |   |   +--->BN_FAST_S_MP_MUL_HIGH_DIGS_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_EXCH_C
+|   |   +--->BN_FAST_S_MP_MUL_HIGH_DIGS_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_MOD_2D_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   |   +--->BN_MP_COPY_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_S_MP_MUL_DIGS_C
+|   |   |   +--->BN_FAST_S_MP_MUL_DIGS_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_EXCH_C
+|   |   +--->BN_MP_SUB_C
+|   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_CMP_D_C
+|   |   +--->BN_MP_SET_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   +--->BN_MP_LSHD_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   +--->BN_MP_ADD_C
+|   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_CMP_C
+|   |   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   +--->BN_MP_REDUCE_2K_SETUP_L_C
+|   |   +--->BN_MP_2EXPT_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   +--->BN_S_MP_SUB_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   +--->BN_MP_REDUCE_2K_L_C
+|   |   +--->BN_MP_DIV_2D_C
+|   |   |   +--->BN_MP_COPY_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_RSHD_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_EXCH_C
+|   |   +--->BN_MP_MUL_C
+|   |   |   +--->BN_MP_TOOM_MUL_C
+|   |   |   |   +--->BN_MP_INIT_MULTI_C
+|   |   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   |   +--->BN_MP_COPY_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   |   +--->BN_MP_COPY_C
 |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_ZERO_C
-|   |   |   |   +--->BN_MP_INIT_MULTI_C
-|   |   |   |   +--->BN_MP_SET_C
-|   |   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   +--->BN_MP_MUL_2_C
 |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_LSHD_C
-|   |   |   |   |   |   +--->BN_MP_RSHD_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_CMP_C
-|   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   +--->BN_MP_ADD_C
 |   |   |   |   |   +--->BN_S_MP_ADD_C
 |   |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
 |   |   |   |   |   +--->BN_S_MP_SUB_C
 |   |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   +--->BN_MP_SUB_C
 |   |   |   |   |   +--->BN_S_MP_ADD_C
 |   |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
 |   |   |   |   |   +--->BN_S_MP_SUB_C
 |   |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_DIV_2D_C
-|   |   |   |   |   +--->BN_MP_MOD_2D_C
-|   |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   +--->BN_MP_DIV_2_C
+|   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_MUL_D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_DIV_3_C
+|   |   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   |   |   +--->BN_MP_EXCH_C
-|   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_KARATSUBA_MUL_C
 |   |   |   |   +--->BN_MP_INIT_SIZE_C
-|   |   |   |   +--->BN_MP_INIT_COPY_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_LSHD_C
 |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   +--->BN_MP_RSHD_C
-|   |   |   |   +--->BN_MP_RSHD_C
-|   |   |   |   +--->BN_MP_MUL_D_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   +--->BN_FAST_S_MP_MUL_DIGS_C
+|   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_ADD_C
-|   |   |   |   +--->BN_S_MP_ADD_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_CMP_MAG_C
-|   |   |   |   +--->BN_S_MP_SUB_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_EXCH_C
-|   +--->BN_MP_SET_C
-|   |   +--->BN_MP_ZERO_C
+|   |   |   +--->BN_S_MP_MUL_DIGS_C
+|   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_EXCH_C
+|   |   +--->BN_S_MP_ADD_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
 |   +--->BN_MP_MOD_C
 |   |   +--->BN_MP_DIV_C
 |   |   |   +--->BN_MP_CMP_MAG_C
@@ -1190,6 +1366,7 @@
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   +--->BN_MP_ZERO_C
 |   |   |   +--->BN_MP_INIT_MULTI_C
+|   |   |   +--->BN_MP_SET_C
 |   |   |   +--->BN_MP_MUL_2D_C
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_LSHD_C
@@ -1374,57 +1551,224 @@
 |   |   |   +--->BN_MP_INIT_SIZE_C
 |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   +--->BN_MP_EXCH_C
+|   +--->BN_MP_SET_C
+|   |   +--->BN_MP_ZERO_C
 |   +--->BN_MP_EXCH_C
-+--->BN_S_MP_EXPTMOD_C
++--->BN_MP_DR_IS_MODULUS_C
++--->BN_MP_REDUCE_IS_2K_C
+|   +--->BN_MP_REDUCE_2K_C
+|   |   +--->BN_MP_COUNT_BITS_C
+|   |   +--->BN_MP_DIV_2D_C
+|   |   |   +--->BN_MP_COPY_C
+|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_RSHD_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_EXCH_C
+|   |   +--->BN_MP_MUL_D_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_S_MP_ADD_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
 |   +--->BN_MP_COUNT_BITS_C
-|   +--->BN_MP_REDUCE_SETUP_C
++--->BN_MP_EXPTMOD_FAST_C
+|   +--->BN_MP_COUNT_BITS_C
+|   +--->BN_MP_MONTGOMERY_SETUP_C
+|   +--->BN_FAST_MP_MONTGOMERY_REDUCE_C
+|   |   +--->BN_MP_GROW_C
+|   |   +--->BN_MP_RSHD_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   +--->BN_MP_MONTGOMERY_REDUCE_C
+|   |   +--->BN_MP_GROW_C
+|   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_RSHD_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   +--->BN_MP_DR_SETUP_C
+|   +--->BN_MP_DR_REDUCE_C
+|   |   +--->BN_MP_GROW_C
+|   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   +--->BN_MP_REDUCE_2K_SETUP_C
 |   |   +--->BN_MP_2EXPT_C
 |   |   |   +--->BN_MP_ZERO_C
 |   |   |   +--->BN_MP_GROW_C
-|   |   +--->BN_MP_DIV_C
-|   |   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   +--->BN_MP_REDUCE_2K_C
+|   |   +--->BN_MP_DIV_2D_C
 |   |   |   +--->BN_MP_COPY_C
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   +--->BN_MP_ZERO_C
-|   |   |   +--->BN_MP_INIT_MULTI_C
-|   |   |   +--->BN_MP_SET_C
-|   |   |   +--->BN_MP_MUL_2D_C
-|   |   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_RSHD_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   |   +--->BN_MP_EXCH_C
+|   |   +--->BN_MP_MUL_D_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_S_MP_ADD_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   +--->BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
+|   |   +--->BN_MP_2EXPT_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   +--->BN_MP_SET_C
+|   |   |   +--->BN_MP_ZERO_C
+|   |   +--->BN_MP_MUL_2_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   +--->BN_MP_CMP_MAG_C
+|   |   +--->BN_S_MP_SUB_C
+|   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_CLAMP_C
+|   +--->BN_MP_MULMOD_C
+|   |   +--->BN_MP_MUL_C
+|   |   |   +--->BN_MP_TOOM_MUL_C
+|   |   |   |   +--->BN_MP_INIT_MULTI_C
+|   |   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   |   +--->BN_MP_COPY_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_COPY_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   +--->BN_MP_MUL_2_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_DIV_2_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_MUL_D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_DIV_3_C
+|   |   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_EXCH_C
 |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   +--->BN_MP_KARATSUBA_MUL_C
+|   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   +--->BN_FAST_S_MP_MUL_DIGS_C
+|   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_CMP_C
-|   |   |   +--->BN_MP_SUB_C
-|   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   +--->BN_S_MP_MUL_DIGS_C
+|   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_EXCH_C
+|   |   +--->BN_MP_MOD_C
+|   |   |   +--->BN_MP_DIV_C
+|   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   +--->BN_MP_COPY_C
 |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   +--->BN_MP_INIT_MULTI_C
+|   |   |   |   +--->BN_MP_SET_C
+|   |   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   |   +--->BN_MP_RSHD_C
 |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   +--->BN_MP_CMP_C
+|   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_DIV_2D_C
+|   |   |   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   +--->BN_MP_INIT_COPY_C
+|   |   |   |   +--->BN_MP_LSHD_C
 |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   +--->BN_MP_MUL_D_C
+|   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   +--->BN_MP_ADD_C
 |   |   |   |   +--->BN_S_MP_ADD_C
 |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_CMP_MAG_C
 |   |   |   |   +--->BN_S_MP_SUB_C
 |   |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_DIV_2D_C
-|   |   |   |   +--->BN_MP_MOD_2D_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_RSHD_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_EXCH_C
 |   |   |   +--->BN_MP_EXCH_C
-|   |   |   +--->BN_MP_INIT_SIZE_C
-|   |   |   +--->BN_MP_INIT_COPY_C
-|   |   |   +--->BN_MP_LSHD_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_RSHD_C
-|   |   |   +--->BN_MP_RSHD_C
-|   |   |   +--->BN_MP_MUL_D_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_CLAMP_C
+|   +--->BN_MP_SET_C
+|   |   +--->BN_MP_ZERO_C
 |   +--->BN_MP_MOD_C
 |   |   +--->BN_MP_DIV_C
 |   |   |   +--->BN_MP_CMP_MAG_C
@@ -1432,7 +1776,6 @@
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   +--->BN_MP_ZERO_C
 |   |   |   +--->BN_MP_INIT_MULTI_C
-|   |   |   +--->BN_MP_SET_C
 |   |   |   +--->BN_MP_MUL_2D_C
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   |   +--->BN_MP_LSHD_C
@@ -1549,120 +1892,6 @@
 |   |   |   +--->BN_MP_INIT_SIZE_C
 |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   +--->BN_MP_EXCH_C
-|   +--->BN_MP_REDUCE_C
-|   |   +--->BN_MP_INIT_COPY_C
-|   |   +--->BN_MP_RSHD_C
-|   |   |   +--->BN_MP_ZERO_C
-|   |   +--->BN_MP_MUL_C
-|   |   |   +--->BN_MP_TOOM_MUL_C
-|   |   |   |   +--->BN_MP_INIT_MULTI_C
-|   |   |   |   +--->BN_MP_MOD_2D_C
-|   |   |   |   |   +--->BN_MP_ZERO_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_MUL_2_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_ADD_C
-|   |   |   |   |   +--->BN_S_MP_ADD_C
-|   |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   |   +--->BN_MP_CMP_MAG_C
-|   |   |   |   |   +--->BN_S_MP_SUB_C
-|   |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_SUB_C
-|   |   |   |   |   +--->BN_S_MP_ADD_C
-|   |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   |   +--->BN_MP_CMP_MAG_C
-|   |   |   |   |   +--->BN_S_MP_SUB_C
-|   |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_DIV_2_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_MUL_2D_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_LSHD_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_MUL_D_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_DIV_3_C
-|   |   |   |   |   +--->BN_MP_INIT_SIZE_C
-|   |   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   |   +--->BN_MP_EXCH_C
-|   |   |   |   +--->BN_MP_LSHD_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_KARATSUBA_MUL_C
-|   |   |   |   +--->BN_MP_INIT_SIZE_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_SUB_C
-|   |   |   |   |   +--->BN_S_MP_ADD_C
-|   |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_CMP_MAG_C
-|   |   |   |   |   +--->BN_S_MP_SUB_C
-|   |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_ADD_C
-|   |   |   |   |   +--->BN_S_MP_ADD_C
-|   |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   |   +--->BN_MP_CMP_MAG_C
-|   |   |   |   |   +--->BN_S_MP_SUB_C
-|   |   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_LSHD_C
-|   |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_FAST_S_MP_MUL_DIGS_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_S_MP_MUL_DIGS_C
-|   |   |   |   +--->BN_MP_INIT_SIZE_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   |   +--->BN_MP_EXCH_C
-|   |   +--->BN_S_MP_MUL_HIGH_DIGS_C
-|   |   |   +--->BN_FAST_S_MP_MUL_HIGH_DIGS_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_INIT_SIZE_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_EXCH_C
-|   |   +--->BN_FAST_S_MP_MUL_HIGH_DIGS_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_MP_MOD_2D_C
-|   |   |   +--->BN_MP_ZERO_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_S_MP_MUL_DIGS_C
-|   |   |   +--->BN_FAST_S_MP_MUL_DIGS_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_INIT_SIZE_C
-|   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_EXCH_C
-|   |   +--->BN_MP_SUB_C
-|   |   |   +--->BN_S_MP_ADD_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_CMP_MAG_C
-|   |   |   +--->BN_S_MP_SUB_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_MP_CMP_D_C
-|   |   +--->BN_MP_SET_C
-|   |   |   +--->BN_MP_ZERO_C
-|   |   +--->BN_MP_LSHD_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   +--->BN_MP_ADD_C
-|   |   |   +--->BN_S_MP_ADD_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   |   +--->BN_MP_CMP_MAG_C
-|   |   |   +--->BN_S_MP_SUB_C
-|   |   |   |   +--->BN_MP_GROW_C
-|   |   |   |   +--->BN_MP_CLAMP_C
-|   |   +--->BN_MP_CMP_C
-|   |   |   +--->BN_MP_CMP_MAG_C
-|   |   +--->BN_S_MP_SUB_C
-|   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_CLAMP_C
 |   +--->BN_MP_MUL_C
 |   |   +--->BN_MP_TOOM_MUL_C
 |   |   |   +--->BN_MP_INIT_MULTI_C
@@ -1731,8 +1960,6 @@
 |   |   |   +--->BN_MP_INIT_SIZE_C
 |   |   |   +--->BN_MP_CLAMP_C
 |   |   |   +--->BN_MP_EXCH_C
-|   +--->BN_MP_SET_C
-|   |   +--->BN_MP_ZERO_C
 |   +--->BN_MP_EXCH_C


@@ -1764,7 +1991,64 @@
 |   |   |   |   +--->BN_MP_CLEAR_C
 |   |   |   +--->BN_MP_COPY_C
 |   |   |   |   +--->BN_MP_GROW_C
-|   |   |   +--->BN_MP_ABS_C
+|   |   |   +--->BN_MP_MOD_C
+|   |   |   |   +--->BN_MP_DIV_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   |   +--->BN_MP_SET_C
+|   |   |   |   |   +--->BN_MP_COUNT_BITS_C
+|   |   |   |   |   +--->BN_MP_ABS_C
+|   |   |   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CMP_C
+|   |   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_DIV_2D_C
+|   |   |   |   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   |   +--->BN_MP_CLEAR_MULTI_C
+|   |   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   |   +--->BN_MP_INIT_COPY_C
+|   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_MUL_D_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_EXCH_C
 |   |   |   +--->BN_MP_SET_C
 |   |   |   |   +--->BN_MP_ZERO_C
 |   |   |   +--->BN_MP_DIV_2_C
@@ -1794,6 +2078,66 @@
 |   |   +--->BN_MP_INVMOD_SLOW_C
 |   |   |   +--->BN_MP_INIT_MULTI_C
 |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   +--->BN_MP_MOD_C
+|   |   |   |   +--->BN_MP_DIV_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_MP_COPY_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   +--->BN_MP_ZERO_C
+|   |   |   |   |   +--->BN_MP_SET_C
+|   |   |   |   |   +--->BN_MP_COUNT_BITS_C
+|   |   |   |   |   +--->BN_MP_ABS_C
+|   |   |   |   |   +--->BN_MP_MUL_2D_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CMP_C
+|   |   |   |   |   +--->BN_MP_SUB_C
+|   |   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_DIV_2D_C
+|   |   |   |   |   |   +--->BN_MP_MOD_2D_C
+|   |   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   |   +--->BN_MP_EXCH_C
+|   |   |   |   |   +--->BN_MP_CLEAR_MULTI_C
+|   |   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   |   +--->BN_MP_INIT_SIZE_C
+|   |   |   |   |   +--->BN_MP_INIT_COPY_C
+|   |   |   |   |   +--->BN_MP_LSHD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_RSHD_C
+|   |   |   |   |   +--->BN_MP_MUL_D_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   +--->BN_MP_CLEAR_C
+|   |   |   |   +--->BN_MP_ADD_C
+|   |   |   |   |   +--->BN_S_MP_ADD_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   |   +--->BN_MP_CMP_MAG_C
+|   |   |   |   |   +--->BN_S_MP_SUB_C
+|   |   |   |   |   |   +--->BN_MP_GROW_C
+|   |   |   |   |   |   +--->BN_MP_CLAMP_C
+|   |   |   |   +--->BN_MP_EXCH_C
 |   |   |   +--->BN_MP_COPY_C
 |   |   |   |   +--->BN_MP_GROW_C
 |   |   |   +--->BN_MP_SET_C
@@ -1828,93 +2172,63 @@
 |   |   +--->BN_MP_COPY_C
 |   |   |   +--->BN_MP_GROW_C
 |   +--->BN_MP_CLEAR_MULTI_C
-|   +--->BN_MP_DR_IS_MODULUS_C
-|   +--->BN_MP_REDUCE_IS_2K_C
-|   |   +--->BN_MP_REDUCE_2K_C
-|   |   |   +--->BN_MP_COUNT_BITS_C
-|   |   |   +--->BN_MP_DIV_2D_C
-|   |   |   |   +--->BN_MP_COPY_C